diff --git a/lib/HTML/Form.pm b/lib/HTML/Form.pm
index 2a10024..b49d18f 100644
--- a/lib/HTML/Form.pm
+++ b/lib/HTML/Form.pm
@@ -4,9 +4,12 @@ use strict;
use URI;
use Carp ();
-use vars qw($VERSION);
+use vars qw($VERSION $Encode_available);
$VERSION = "5.817";
+eval { require Encode };
+$Encode_available = !$@;
+
my %form_tags = map {$_ => 1} qw(input textarea button select option);
my %type2class = (
@@ -151,6 +154,7 @@ sub parse
$f = $class->new($attr->{'method'},
$action,
$attr->{'enctype'});
+ $f->accept_charset($attr->{'accept-charset'}) if $attr->{'accept-charset'};
$f->{attr} = $attr;
$f->strict(1) if $strict;
%openselect = ();
@@ -271,6 +275,7 @@ sub new {
$self->{method} = uc(shift || "GET");
$self->{action} = shift || Carp::croak("No action defined");
$self->{enctype} = lc(shift || "application/x-www-form-urlencoded");
+ $self->{accept_charset} = "UNKNOWN";
$self->{inputs} = [@_];
$self;
}
@@ -317,11 +322,22 @@ I to.
This method gets/sets the encoding type for the form data. It is a
string like "application/x-www-form-urlencoded" or "multipart/form-data".
+=item $accept = $form->accept_charset
+
+=item $form->accept_charset( $new_accept )
+
+This method gets/sets the list of charset encodings that the server
+processing the form accepts. Current implementation supports only
+one-element lists. Default value is "UNKNOWN" which we interpret as a
+request to use UTF-8 encoding. To encode character strings you should
+have modern perl with Encode module. On older perls this method has no
+effect.
+
=cut
BEGIN {
# Set up some accesor
- for (qw(method action enctype)) {
+ for (qw(method action enctype accept_charset)) {
my $m = $_;
no strict 'refs';
*{$m} = sub {
@@ -641,6 +657,13 @@ sub make_request
my $enctype = $self->{'enctype'};
my @form = $self->form;
+ my $charset = $self->accept_charset eq "UNKNOWN" ? 'utf-8' : $self->accept_charset;
+ if ($Encode_available) {
+ foreach my $fi (@form) {
+ $fi = Encode::encode($charset, $fi) if utf8::is_utf8($fi);
+ }
+ }
+
if ($method eq "GET") {
require HTTP::Request;
$uri = URI->new($uri, "http");
diff --git a/t/html/form-unicode.t b/t/html/form-unicode.t
new file mode 100644
index 0000000..b574c9c
--- /dev/null
+++ b/t/html/form-unicode.t
@@ -0,0 +1,73 @@
+#!perl -w
+
+use strict;
+use Test qw(plan ok skip);
+
+eval { require Encode };
+if ($@) {
+ plan tests => 1;
+ skip('Skip: Encode not available', 0);
+ exit;
+}
+else {
+ plan tests => 15;
+}
+
+use HTML::Form;
+
+my @warn;
+$SIG{__WARN__} = sub { push(@warn, $_[0]) };
+
+my $f = HTML::Form->parse(<<'EOT', "http://localhost/");
+
+
+EOT
+
+ok($f->value("name"), "");
+ok($f->accept_charset, "UNKNOWN");
+my $req = $f->click;
+ok($req->uri, "http://localhost/abc?name=");
+
+$f->value(name => "\x{0424}"); # capital cyrillic ef
+$req = $f->click;
+ok($req->method, "GET");
+ok($req->uri, "http://localhost/abc?name=%D0%A4");
+
+$f->method('POST');
+$f->enctype('multipart/form-data');
+
+$req = $f->click;
+ok($req->uri, "http://localhost/abc");
+ok($req->content, "--xYzZY\r\nContent-Disposition: form-data; name=\"name\"\r\n\r\n\xD0\xA4\r\n--xYzZY--\r\n");
+
+$f->accept_charset('koi8-r');
+$req = $f->click;
+ok($req->uri, "http://localhost/abc");
+ok($req->content, "--xYzZY\r\nContent-Disposition: form-data; name=\"name\"\r\n\r\n\xE6\r\n--xYzZY--\r\n");
+
+$f->method('GET');
+$req = $f->click;
+ok($req->uri, "http://localhost/abc?name=%E6");
+
+$f = HTML::Form->parse(<<'EOT', "http://localhost/");
+
+
+EOT
+
+ok($f->accept_charset, 'koi8-r');
+
+$f->value(name => "\x{0425}"); # capital cyrillic kha
+$req = $f->click;
+ok($req->method, "GET");
+ok($req->uri, "http://localhost/abc?name=%E8");
+
+$f->method('POST');
+$f->enctype('multipart/form-data');
+
+$req = $f->click;
+ok($req->uri, "http://localhost/abc");
+ok($req->content, "--xYzZY\r\nContent-Disposition: form-data; name=\"name\"\r\n\r\n\xE8\r\n--xYzZY--\r\n");