User: sits
Date: 06/05/30 00:18:09
Modified: . codestriker.conf
bin install.pl
lib Codestriker.pm
lib/Codestriker/Action SubmitNewTopic.pm
lib/Codestriker/FileParser Parser.pm
lib/Codestriker/Model Delta.pm
Log:
Can now review topic text which is encoded in something else besides
UTF8. The $topic_text_encoding configuration variable defines what
the expected encoding should be - it defaults to utf8 if it is not
set. I tried using Encode::guess, but this turned out to be
unreliable.
This could potentially be an option on the create topic page, but I
am guessing most sites use the same encoding for all their topics.
Most people should use utf8 these days anyway.
Index: codestriker.conf
===================================================================
RCS file: /cvsroot/codestriker/codestriker/codestriker.conf,v
retrieving revision 1.80
retrieving revision 1.81
diff -u -r1.80 -r1.81
--- codestriker.conf 18 Apr 2006 10:45:39 -0000 1.80
+++ codestriker.conf 30 May 2006 07:18:09 -0000 1.81
@@ -380,6 +380,12 @@
# }
};
+# Character encoding to use when reading topic text. Default is UTF-8
+# but this can be over-ridden here. List of example encoding names
+# can be seen here: http://perldoc.perl.org/Encode/Supported.html.
+#$topic_text_encoding = 'utf8';
+#$topic_text_encoding = 'gb2312';
+
# Each comment thread (or issue) that is created against a specific
# code line in Codestriker can have a configurable number of
# user-defined metrics recorded against it.
Index: install.pl
===================================================================
RCS file: /cvsroot/codestriker/codestriker/bin/install.pl,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -r1.5 -r1.6
--- install.pl 22 May 2006 11:39:39 -0000 1.5
+++ install.pl 30 May 2006 07:18:09 -0000 1.6
@@ -99,6 +99,16 @@
name => 'XML::RSS',
version => '1.05',
optional => 1
+ },
+ {
+ name => 'Encode::Byte',
+ version => '0',
+ optional => 0
+ },
+ {
+ name => 'Encode::Unicode',
+ version => '0',
+ optional => 0
}
];
@@ -111,6 +121,22 @@
push @{$modules}, $database->get_module_dependencies();
};
+# Check for various character encoding modules that are required.
+if (defined $Codestriker::topic_text_encoding) {
+ if ($Codestriker::topic_text_encoding =~ /euc\-cn|gb2312|hz|gbk/) {
+ push @{$modules}, { name => 'Encode::CN', version => '0' };
+ }
+ if ($Codestriker::topic_text_encoding =~ /jp|jis/) {
+ push @{$modules}, { name => 'Encode::JP', version => '0' };
+ }
+ if ($Codestriker::topic_text_encoding =~ /kr|johab/) {
+ push @{$modules}, { name => 'Encode::KR', version => '0' };
+ }
+ if ($Codestriker::topic_text_encoding =~ /big5/) {
+ push @{$modules}, { name => 'Encode::TW', version => '0' };
+ }
+}
+
my %missing_optional = ();
my %missing = ();
foreach my $module (@{$modules}) {
Index: Codestriker.pm
===================================================================
RCS file: /cvsroot/codestriker/codestriker/lib/Codestriker.pm,v
retrieving revision 1.88
retrieving revision 1.89
diff -u -r1.88 -r1.89
--- Codestriker.pm 29 May 2006 23:19:36 -0000 1.88
+++ Codestriker.pm 30 May 2006 07:18:09 -0000 1.89
@@ -24,11 +24,11 @@
$allow_projects $antispam_email $VERSION $title $BASEDIR
$metric_config $tmpdir @metric_schema $comment_state_metrics
$project_states $rss_enabled $repository_name_map
$repository_url_map
- @valid_repository_names
+ @valid_repository_names $topic_text_encoding
);
# Version of Codestriker.
-$Codestriker::VERSION = "1.9.2-alpha-6";
+$Codestriker::VERSION = "1.9.2-alpha-7";
# Default title to display on each Codestriker screen.
$Codestriker::title = "Codestriker $Codestriker::VERSION";
Index: SubmitNewTopic.pm
===================================================================
RCS file:
/cvsroot/codestriker/codestriker/lib/Codestriker/Action/SubmitNewTopic.pm,v
retrieving revision 1.27
retrieving revision 1.28
diff -u -r1.27 -r1.28
--- SubmitNewTopic.pm 26 May 2006 05:42:48 -0000 1.27
+++ SubmitNewTopic.pm 30 May 2006 07:18:09 -0000 1.28
@@ -13,6 +13,8 @@
use File::Temp qw/ tempfile /;
use FileHandle;
+use Encode;
+
use Codestriker::Model::Topic;
use Codestriker::Http::Render;
use Codestriker::Repository::RepositoryFactory;
@@ -215,13 +217,20 @@
$fh = $temp_topic_fh;
}
+ # Assume the topic text input file is set to UTF8 by default, unless
+ # it has been explicitly over-ridden in the codestriker.conf file.
+ my $encoding = 'utf8';
+ if (defined $Codestriker::topic_text_encoding &&
+ $Codestriker::topic_text_encoding ne '') {
+ $encoding = $Codestriker::topic_text_encoding;
+ }
+
my @deltas = ();
if ($feedback eq "") {
# Try to parse the topic text into its diff chunks.
- binmode $fh, ':utf8';
@deltas =
Codestriker::FileParser::Parser->parse($fh, "text/plain",
$repository,
- $topicid, $topic_file);
+ $topicid, $topic_file,
$encoding);
if ($#deltas == -1) {
# Nothing in the file, report an error.
$feedback .= "Reviewable text in topic is empty.\n";
@@ -241,7 +250,7 @@
# If the topic text has been uploaded from a file, read from it now.
if (defined $fh) {
while (<$fh>) {
- $topic_text .= $_;
+ $topic_text .= decode($encoding, $_);
}
if ($topic_text eq "") {
if (defined $temp_error_fh) {
Index: Parser.pm
===================================================================
RCS file:
/cvsroot/codestriker/codestriker/lib/Codestriker/FileParser/Parser.pm,v
retrieving revision 1.20
retrieving revision 1.21
diff -u -r1.20 -r1.21
--- Parser.pm 19 May 2006 00:40:15 -0000 1.20
+++ Parser.pm 30 May 2006 07:18:09 -0000 1.21
@@ -17,6 +17,8 @@
use FileHandle;
use File::Temp qw/ tempfile /;
+use Encode;
+
use Codestriker::FileParser::CvsUnidiff;
use Codestriker::FileParser::SubversionDiff;
use Codestriker::FileParser::PerforceDescribe;
@@ -29,9 +31,9 @@
# Given the content-type and the file handle, try to determine what files,
# lines, revisions and diffs have been submitted in this review.
-sub parse ($$$$$) {
+sub parse ($$$$$$) {
my ($type, $fh, $content_type, $repository, $topicid,
- $uploaded_filename) = @_;
+ $uploaded_filename, $encoding) = @_;
# Diffs found.
my @diffs = ();
@@ -49,14 +51,15 @@
else {
$tmpfh = tempfile();
}
-# binmode $tmpfh, ':utf8';
+ binmode $tmpfh, ':utf8';
if (!$tmpfh) {
die "Unable to create temporary parse file: $!";
}
+ binmode $fh;
while (<$fh>) {
- my $line = $_;
+ my $line = decode($encoding, $_);
$line =~ s/\r\n/\n/go;
print $tmpfh $line;
}
Index: Delta.pm
===================================================================
RCS file: /cvsroot/codestriker/codestriker/lib/Codestriker/Model/Delta.pm,v
retrieving revision 1.10
retrieving revision 1.11
diff -u -r1.10 -r1.11
--- Delta.pm 26 May 2006 05:42:49 -0000 1.10
+++ Delta.pm 30 May 2006 07:18:09 -0000 1.11
@@ -25,7 +25,7 @@
$self->{binary} = $_[3];
$self->{old_linenumber} = $_[4];
$self->{new_linenumber} = $_[5];
- $self->{text} = $_[6];
+ $self->{text} = decode_utf8($_[6]);
$self->{description} = (defined $_[7]) ? decode_utf8($_[7]) : "";
$self->{filenumber} = $_[8];
$self->{repmatch} = $_[9];
_______________________________________________
Codestriker-commits mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/codestriker-commits