cvs commit: modperl/t/net/perl util.pl
stas2003/02/19 22:48:05 Modified:t/net/perl util.pl Log: add tests for Apache::unescape_url Revision ChangesPath 1.16 +20 -1 modperl/t/net/perl/util.pl Index: util.pl === RCS file: /home/cvs/modperl/t/net/perl/util.pl,v retrieving revision 1.15 retrieving revision 1.16 diff -u -r1.15 -r1.16 --- util.pl 19 Jun 2002 16:31:52 - 1.15 +++ util.pl 20 Feb 2003 06:48:04 - 1.16 @@ -3,7 +3,7 @@ use Apache::test; $|++; my $i = 0; -my $tests = 7; +my $tests = 9; my $r = shift; $r-send_http_header('text/plain'); @@ -100,6 +100,25 @@ Perl = sub { my $esc = URI::Escape::uri_escape($uri) }, }); =cut + +{ +my $str = aa%20dd%2epl; +my $expected = aa dd.pl; +my $received = Apache::unescape_url($str); +test ++$i, $received eq $expected; +print expected: $expected\n; +print received: $received\n; +} + +{ +my $str = undef; +my $expected = ; +no warnings; +my $received = Apache::unescape_url($str); +test ++$i, $received eq $expected; +print expected: $expected\n; +print received: $received\n; +} $C = Apache::Util::ht_time(); $Perl = HTTP::Date::time2str();
cvs commit: modperl/t/net/perl util.pl
stas2003/02/19 23:09:14 Modified:t/net/perl util.pl Log: s/no warnings/local $^W = 0/, I forgot that we have to deal with perl 5.6 on the mod_perl 1.0 land. Revision ChangesPath 1.17 +1 -1 modperl/t/net/perl/util.pl Index: util.pl === RCS file: /home/cvs/modperl/t/net/perl/util.pl,v retrieving revision 1.16 retrieving revision 1.17 diff -u -r1.16 -r1.17 --- util.pl 20 Feb 2003 06:48:04 - 1.16 +++ util.pl 20 Feb 2003 07:09:14 - 1.17 @@ -113,7 +113,7 @@ { my $str = undef; my $expected = ; -no warnings; +local $^W = 0; my $received = Apache::unescape_url($str); test ++$i, $received eq $expected; print expected: $expected\n;
Re: cvs commit: modperl/t/net/perl util.pl
A casual user won't understand that documentation... Hell, I'm not even sure I completely understand the implications of it and when to use/not use escape_html based on it... I think an example is called for, but not in the POD... Maybe in the Guide? Issac Eric Cholet wrote: --On Sunday, March 24, 2002 21:57:54 + [EMAIL PROTECTED] wrote: dougm 02/03/24 13:57:53 Modified:.Changes STATUS src/modules/perl Util.xs t/net/perl util.pl Log: Submitted by: Geoff Young [EMAIL PROTECTED] Reviewed by:dougm properly escape highbit chars in Apache::Utils::escape_html This is uncool for those of us using a non-ASCII encoding and sending out lots of characters with the 8th bit set, e.g. in a French page many accented characters will be replaced by 6-byte sequences. If I'm sending out Content-type: text/html; charset=ISO-8859-1, and calling escape_html to escape '', '' and the like, I'm going to be serving quite a lot more bytes than before this patch. However escape_html () has no clue as to what the character set is, and whether it has been correctly specified in the Content-Type. It has also be mentionned here that escape_html is only valid for single-byte encodings. So this patch does the right thing to escape the odd 8 bit char in a mostly ASCII output, but users of other charsets should be warned not to use it. I use HTML::Entities::encode($_[0], '') myself. Therefore I propose a doc patch to clear this up: Index: Util.pm === RCS file: /home/cvs/modperl/Util/Util.pm,v retrieving revision 1.8 diff -u -r1.8 Util.pm --- Util.pm4 Mar 2000 20:55:47 -1.8 +++ Util.pm25 Mar 2002 18:19:37 - @@ -68,6 +68,13 @@ my $esc = Apache::Util::escape_html($html); +This function is unaware of its argument's character set and encoding. +It assumes a single-byte encoding and escapes all characters with the +8th bit set. Do not use it with multi-byte encodings such as utf8. +When using a single byte non-ASCII encoding such as ISO-8859-1, +consider specifying the character set in the Content-Type header, +and using HTML::Entities to avoid unnecessary escaping. + =item escape_uri This function replaces all unsafe characters in the $string with their -- Eric Cholet - To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]
cvs commit: modperl/t/net/perl util.pl
dougm 02/03/26 06:52:28 Modified:.Changes t/net/perl util.pl Log: Submitted by: Tatsuhiko Miyagawa Reviewed by: dougm fix bug in modules/util test Revision ChangesPath 1.640 +3 -0 modperl/Changes Index: Changes === RCS file: /home/cvs/modperl/Changes,v retrieving revision 1.639 retrieving revision 1.640 diff -u -r1.639 -r1.640 --- Changes 25 Mar 2002 18:45:23 - 1.639 +++ Changes 26 Mar 2002 14:52:28 - 1.640 @@ -10,6 +10,9 @@ =item 1.26_01-dev +fix bug in modules/util test +[Tatsuhiko Miyagawa [EMAIL PROTECTED]] + Adjust the mailling list addresses 's|\@apache\.org|\@perl.apache.org|' [Stas Bekman [EMAIL PROTECTED]] 1.14 +1 -1 modperl/t/net/perl/util.pl Index: util.pl === RCS file: /home/cvs/modperl/t/net/perl/util.pl,v retrieving revision 1.13 retrieving revision 1.14 diff -u -r1.13 -r1.14 --- util.pl 25 Mar 2002 18:45:23 - 1.13 +++ util.pl 26 Mar 2002 14:52:28 - 1.14 @@ -152,7 +152,7 @@ Perl: 21 secs (20.57 usr 0.14 sys = 20.71 cpu) =cut -my $date_str = Sat, 18 Jul 1998 08:38:00 -0700; +my $date_str = Sat, 18 Jul 1998 08:38:00 GMT; test ++$i, Apache::Util::parsedate($date_str);
Re: cvs commit: modperl/t/net/perl util.pl
--On Sunday, March 24, 2002 21:57:54 + [EMAIL PROTECTED] wrote: dougm 02/03/24 13:57:53 Modified:.Changes STATUS src/modules/perl Util.xs t/net/perl util.pl Log: Submitted by: Geoff Young [EMAIL PROTECTED] Reviewed by:dougm properly escape highbit chars in Apache::Utils::escape_html This is uncool for those of us using a non-ASCII encoding and sending out lots of characters with the 8th bit set, e.g. in a French page many accented characters will be replaced by 6-byte sequences. If I'm sending out Content-type: text/html; charset=ISO-8859-1, and calling escape_html to escape '', '' and the like, I'm going to be serving quite a lot more bytes than before this patch. However escape_html () has no clue as to what the character set is, and whether it has been correctly specified in the Content-Type. It has also be mentionned here that escape_html is only valid for single-byte encodings. So this patch does the right thing to escape the odd 8 bit char in a mostly ASCII output, but users of other charsets should be warned not to use it. I use HTML::Entities::encode($_[0], '') myself. Therefore I propose a doc patch to clear this up: Index: Util.pm === RCS file: /home/cvs/modperl/Util/Util.pm,v retrieving revision 1.8 diff -u -r1.8 Util.pm --- Util.pm 4 Mar 2000 20:55:47 - 1.8 +++ Util.pm 25 Mar 2002 18:19:37 - @@ -68,6 +68,13 @@ my $esc = Apache::Util::escape_html($html); +This function is unaware of its argument's character set and encoding. +It assumes a single-byte encoding and escapes all characters with the +8th bit set. Do not use it with multi-byte encodings such as utf8. +When using a single byte non-ASCII encoding such as ISO-8859-1, +consider specifying the character set in the Content-Type header, +and using HTML::Entities to avoid unnecessary escaping. + =item escape_uri This function replaces all unsafe characters in the $string with their -- Eric Cholet
Re: cvs commit: modperl/t/net/perl util.pl
i had a bad feeling about this. we should not be implementing escape_html to begin with, the functionality should all be in apache. i'm going to back out the patch. anybody care to make a doc patch to explain the problems with escape_html before the patch went in? thanks.
Re: cvs commit: modperl/t/net/perl util.pl
Doug MacEachern wrote: i had a bad feeling about this. we should not be implementing escape_html to begin with, the functionality should all be in apache. i'm going to back out the patch. sounds wise, especially considering people like Eric will end up with larger pages as a result, while the patch fixes a rather obscure vunerability, for which other solutions (HTML::Entities) are available. anybody care to make a doc patch to explain the problems with escape_html before the patch went in? I nominate robin, since I forget how it came up in the first place :) IIRC is was due to this post http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters/2000-03/msg00750.html and specifically an exploit involving browsers incorrectly assuming 0x8b as a and 0x9b as a , thus creating a way around escape_html(). Robin, does that accurately summarize it? it's been far too long for me :) --Geoff
Re: cvs commit: modperl/t/net/perl util.pl
--On Monday, March 25, 2002 10:29:11 -0800 Doug MacEachern [EMAIL PROTECTED] wrote: i had a bad feeling about this. we should not be implementing escape_html to begin with, the functionality should all be in apache. i'm going to back out the patch. anybody care to make a doc patch to explain the problems with escape_html before the patch went in? thanks. I believe the patch is useful though, in cases where the charset is not explicitely specified and there's an odd character with the 8th bit set it will now do the right thing. I guess a lot of US coders would fall in that situation... I suppose it's faster than HTML::Entities (I haven't benchmarked it though). So I suspect the patch will fix more situations than it breaks: if using a single-byte non-ASCII encoding, it doesn't actually break anything, just adds bloat. If using a multi-byte encoding escape_html was broken/inapplicable already. -- Eric Cholet
cvs commit: modperl/t/net/perl util.pl
dougm 02/03/25 10:45:23 Modified:.Changes src/modules/perl Util.xs t/net/perl util.pl Log: backing out change: properly escape highbit chars in Apache::Utils::escape_html Revision ChangesPath 1.639 +0 -4 modperl/Changes Index: Changes === RCS file: /home/cvs/modperl/Changes,v retrieving revision 1.638 retrieving revision 1.639 diff -u -r1.638 -r1.639 --- Changes 25 Mar 2002 02:57:59 - 1.638 +++ Changes 25 Mar 2002 18:45:23 - 1.639 @@ -26,10 +26,6 @@ properly deal with $r-status codes (e.g. redirect) in Apache::RegistryNG [Geoff Young [EMAIL PROTECTED]] -properly escape highbit chars in Apache::Utils::escape_html -[Geoff Young [EMAIL PROTECTED], -Robin Berjon [EMAIL PROTECTED]] - allow $r-auth_name and $r-auth_type to be set on win32 [John Kelly [EMAIL PROTECTED]] 1.11 +1 -8 modperl/src/modules/perl/Util.xs Index: Util.xs === RCS file: /home/cvs/modperl/src/modules/perl/Util.xs,v retrieving revision 1.10 retrieving revision 1.11 diff -u -r1.10 -r1.11 --- Util.xs 24 Mar 2002 21:57:53 - 1.10 +++ Util.xs 25 Mar 2002 18:45:23 - 1.11 @@ -32,9 +32,6 @@ return sv; } -#define IS_HIGHBIT_CHAR(b) \ - ( (((unsigned char)(b)) 126) (((unsigned char)(b)) = 255) ) - static SV *my_escape_html(char *s) { int i, j; @@ -46,7 +43,7 @@ j += 3; else if (s[i] == '') j += 4; -else if (s[i] == '' || IS_HIGHBIT_CHAR(s[i])) +else if (s[i] == '') j += 5; if (j == 0) @@ -70,10 +67,6 @@ memcpy(SvPVX(x)[j], quot;, 6); j += 5; } -else if (IS_HIGHBIT_CHAR(s[i])) { -sprintf(SvPVX(x)[j], #%d;, (unsigned char)s[i]); -j += 5; -} else SvPVX(x)[j] = s[i]; 1.13 +1 -5 modperl/t/net/perl/util.pl Index: util.pl === RCS file: /home/cvs/modperl/t/net/perl/util.pl,v retrieving revision 1.12 retrieving revision 1.13 diff -u -r1.12 -r1.13 --- util.pl 24 Mar 2002 21:57:53 - 1.12 +++ util.pl 25 Mar 2002 18:45:23 - 1.13 @@ -2,7 +2,7 @@ use Apache::test; $|++; my $i = 0; -my $tests = 8; +my $tests = 7; my $r = shift; $r-send_http_header('text/plain'); @@ -61,10 +61,6 @@ /body /html EOF - -#XXX: this test could be more robust, but its better than nothing -my $c = Apache::Util::escape_html(\x8b); -test ++$i, $c =~ /^\#\d{3,3}\;$/; my $txt = No html tags in here at all; my $etxt = Apache::Util::escape_html($txt);
cvs commit: modperl/t/net/perl util.pl
dougm 02/03/24 13:57:53 Modified:.Changes STATUS src/modules/perl Util.xs t/net/perl util.pl Log: Submitted by: Geoff Young [EMAIL PROTECTED] Reviewed by: dougm properly escape highbit chars in Apache::Utils::escape_html Revision ChangesPath 1.633 +4 -0 modperl/Changes Index: Changes === RCS file: /home/cvs/modperl/Changes,v retrieving revision 1.632 retrieving revision 1.633 diff -u -r1.632 -r1.633 --- Changes 24 Mar 2002 21:00:05 - 1.632 +++ Changes 24 Mar 2002 21:57:53 - 1.633 @@ -10,6 +10,10 @@ =item 1.26_01-dev +properly escape highbit chars in Apache::Utils::escape_html +[Geoff Young [EMAIL PROTECTED], +Robin Berjon [EMAIL PROTECTED]] + allow $r-auth_name and $r-auth_type to be set on win32 [John Kelly [EMAIL PROTECTED]] 1.9 +1 -7 modperl/STATUS Index: STATUS === RCS file: /home/cvs/modperl/STATUS,v retrieving revision 1.8 retrieving revision 1.9 diff -u -r1.8 -r1.9 --- STATUS24 Mar 2002 21:00:20 - 1.8 +++ STATUS24 Mar 2002 21:57:53 - 1.9 @@ -1,5 +1,5 @@ mod_perl 1.3 STATUS: - Last modified at [$Date: 2002/03/24 21:00:20 $] + Last modified at [$Date: 2002/03/24 21:57:53 $] Release: @@ -20,12 +20,6 @@ Report: http://marc.theaimsgroup.com/?l=apache-modperlm=97449481013350w=2 Status: doc patch at http://marc.theaimsgroup.com/?l=apache-modperlm=97450363501652w=2 - -* Apache::Utils::escape_html issues -Report: http://marc.theaimsgroup.com/?l=apache-modperlm=101180404809059w=2 - Status: - patch available - http://marc.theaimsgroup.com/?l=apache-modperl-devm=101188287032621w=2 * Apache::RegistryNG issues Report: http://marc.theaimsgroup.com/?l=apache-modperl-devm=101240123609773w=2 1.10 +8 -1 modperl/src/modules/perl/Util.xs Index: Util.xs === RCS file: /home/cvs/modperl/src/modules/perl/Util.xs,v retrieving revision 1.9 retrieving revision 1.10 diff -u -r1.9 -r1.10 --- Util.xs 4 Mar 2000 20:55:47 - 1.9 +++ Util.xs 24 Mar 2002 21:57:53 - 1.10 @@ -32,6 +32,9 @@ return sv; } +#define IS_HIGHBIT_CHAR(b) \ + ( (((unsigned char)(b)) 126) (((unsigned char)(b)) = 255) ) + static SV *my_escape_html(char *s) { int i, j; @@ -43,7 +46,7 @@ j += 3; else if (s[i] == '') j += 4; -else if (s[i] == '') +else if (s[i] == '' || IS_HIGHBIT_CHAR(s[i])) j += 5; if (j == 0) @@ -67,6 +70,10 @@ memcpy(SvPVX(x)[j], quot;, 6); j += 5; } +else if (IS_HIGHBIT_CHAR(s[i])) { +sprintf(SvPVX(x)[j], #%d;, (unsigned char)s[i]); +j += 5; +} else SvPVX(x)[j] = s[i]; 1.12 +5 -1 modperl/t/net/perl/util.pl Index: util.pl === RCS file: /home/cvs/modperl/t/net/perl/util.pl,v retrieving revision 1.11 retrieving revision 1.12 diff -u -r1.11 -r1.12 --- util.pl 6 Mar 2000 07:50:54 - 1.11 +++ util.pl 24 Mar 2002 21:57:53 - 1.12 @@ -2,7 +2,7 @@ use Apache::test; $|++; my $i = 0; -my $tests = 7; +my $tests = 8; my $r = shift; $r-send_http_header('text/plain'); @@ -61,6 +61,10 @@ /body /html EOF + +#XXX: this test could be more robust, but its better than nothing +my $c = Apache::Util::escape_html(\x8b); +test ++$i, $c =~ /^\#\d{3,3}\;$/; my $txt = No html tags in here at all; my $etxt = Apache::Util::escape_html($txt);