-----BEGIN PGP SIGNED MESSAGE----- Moin,
attached is the patch, and a testversion of t/uni/class.t (so that you can toy around with it). The testversion contains two subroutines, one that constructs the qr// objects in the subroutine, and one that takes them as arguments. The latter is slower, so the final patch contains the other variant. I have no idea what's the difference. In any event, the patch changes two more places to use the subroutine, but on my system the timings do not change in a measurable way for these, only the very last loop is affacted by the changes. On my system (2.0 Ghz AMD XP 2400+) the time goes down from about 31 seconds to about 19 seconds after the patch. The entire testsuite takes now 175 (opposed to 181 a few days a ago) seconds: All tests successful. u=2.68 s=0.71 cu=175.30 cs=25.13 scripts=954 tests=107895 Best wishes, Tels - -- Signed on Wed Jun 29 20:37:44 2005 with key 0x93B84C15. Visit my photo gallery at http://bloodgate.com/photos/ PGP key on http://bloodgate.com/tels.asc or per email. "My wife is just a slow brain, running up the bill.." -- Often misheard song lyrics #149 -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.2.4 (GNU/Linux) iQEVAwUBQsL0ZHcLPEOTuEwVAQGGZgf/QBKyd1xJm8TfnpequaZ2pvMQ38/ebTAy 0Lp9yGAGvBqhdSNI/eFeRn6BcoQ5Q2Vm0Wg3fIw2B+Eet2E2L5EvATOv8NAcjNnA Iy3uQI0j6nrq6LXXZq7tCb6H1AoopECw/GrPPPJp8JCtna6fEKV0Ap4rCjf+h9JB +e02uCinbKzUKNmQVuCmkFXdMwqhZzFWVKXKagN6X61WIVwWAwie8OgA55QIzqgu 5cJKKZJ4KN3LiO/Biz0HrAkhkoPLuoU7ZxxuC16gqi0rUY+wgGL+nmKpqw6NqtkX s0w0DrTO1jYe8OG1VQhy7qxb6AphbLOPQryDyRQPpDmojBMqQzgFLA== =Mi1U -----END PGP SIGNATURE-----
diff -ruN blead/t/uni/class.t blead.uni_class/t/uni/class.t --- blead/t/uni/class.t 2005-05-28 17:08:44.000000000 +0200 +++ blead.uni_class/t/uni/class.t 2005-06-29 20:35:18.000000000 +0200 @@ -25,6 +25,30 @@ END } +sub test_regexp ($$) { + # test that given string consists of N-1 chars matching $qr1, and 1 + # char matching $qr2 + my ($str, $blk) = @_; + + # constructing these objects here makes the last test loop go much faster + my $qr1 = qr/(\p{$blk}+)/; + if ($str =~ $qr1) { + is ($1, substr($str, 0, -1)); # all except last char + } + else { + is ('first N-1 chars did not match', $blk); + } + + my $qr2 = qr/(\P{$blk}+)/; + if ($str =~ $qr2) { + is ($1, substr($str, -1)); # only last char + } + else { + is ('last char did not match', $blk); + } +} + +use strict; my $str = join "", map chr($_), 0x20 .. 0x6F; @@ -106,8 +130,7 @@ for my $y ($abbr, $utf8::PropValueAlias{$p}{$abbr}, $utf8::PVA_abbr_map{gc_sc}{$abbr}) { is($str =~ /(\p{$x: $y}+)/ && $1, substr($str, 0, -1)); is($str =~ /(\P{$x= $y}+)/ && $1, substr($str, -1)); - is($str =~ /(\p{$y}+)/ && $1, substr($str, 0, -1)); - is($str =~ /(\P{$y}+)/ && $1, substr($str, -1)); + test_regexp ($str, $y); } } } @@ -127,7 +150,7 @@ my %files; - my $dirname = File::Spec->catdir($updir => lib => unicore => lib => gc_sc); + my $dirname = File::Spec->catdir($updir => lib => unicore => lib => 'gc_sc'); opendir D, $dirname or die $!; @files{readdir(D)} = (); closedir D; @@ -146,8 +169,7 @@ for my $y ($_, $utf8::PA_reverse{$_}) { is($str =~ /(\p{$x: $y}+)/ && $1, substr($str, 0, -1)); is($str =~ /(\P{$x= $y}+)/ && $1, substr($str, -1)); - is($str =~ /(\p{$y}+)/ && $1, substr($str, 0, -1)); - is($str =~ /(\P{$y}+)/ && $1, substr($str, -1)); + test_regexp ($str, $y); } } } @@ -160,16 +182,16 @@ ); next unless -e $filename; + + print "# In$_ $filename\n"; + my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1]; my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1); my $blk = $_; - is($str =~ /(\p{$blk}+)/ && $1, substr($str, 0, -1)); - is($str =~ /(\P{$blk}+)/ && $1, substr($str, -1)); - + test_regexp ($str, $blk); $blk =~ s/^In/Block:/; - - is($str =~ /(\p{$blk}+)/ && $1, substr($str, 0, -1)); - is($str =~ /(\P{$blk}+)/ && $1, substr($str, -1)); + test_regexp ($str, $blk); } +
BEGIN { chdir 't' if -d 't'; @INC = qw(../lib .); require "test.pl"; } plan tests => 4670; sub MyUniClass { <<END; 0030 004F END } sub Other::Class { <<END; 0040 005F END } sub A::B::Intersection { <<END; +main::MyUniClass &Other::Class END } sub test_regexp ($$$$) { # test that given string consists of N-1 chars matching $qr1, and 1 # char matching $qr2 my ($str, $qr1, $qr2, $blk) = @_; if ($str =~ $qr1) { is ($1, substr($str, 0, -1)); # all except last char } else { is ('first N-1 chars did not match', $blk); } if ($str =~ $qr2) { is ($1, substr($str, -1)); # only last char } else { is ('last char did not match', $blk); } } sub test_regexp_2 ($$) { # test that given string consists of N-1 chars matching $qr1, and 1 # char matching $qr2 my ($str, $blk) = @_; my $qr1 = qr/(\p{$blk}+)/; my $qr2 = qr/(\P{$blk}+)/; if ($str =~ $qr1) { is ($1, substr($str, 0, -1)); # all except last char } else { is ('first N-1 chars did not match', $blk); } if ($str =~ $qr2) { is ($1, substr($str, -1)); # only last char } else { is ('last char did not match', $blk); } } use strict; my $str = join "", map chr($_), 0x20 .. 0x6F; # make sure it finds built-in class is(($str =~ /(\p{Letter}+)/)[0], 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'); is(($str =~ /(\p{l}+)/)[0], 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'); # make sure it finds user-defined class is(($str =~ /(\p{MyUniClass}+)/)[0], '0123456789:;<=>[EMAIL PROTECTED]'); # make sure it finds class in other package is(($str =~ /(\p{Other::Class}+)/)[0], '@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_'); # make sure it finds class in other OTHER package is(($str =~ /(\p{A::B::Intersection}+)/)[0], '@ABCDEFGHIJKLMNO'); # all of these should look in lib/unicore/bc/AL.pl $str = "\x{070D}\x{070E}\x{070F}\x{0710}\x{0711}"; is(($str =~ /(\P{BidiClass: ArabicLetter}+)/)[0], "\x{070E}\x{070F}"); is(($str =~ /(\P{BidiClass: AL}+)/)[0], "\x{070E}\x{070F}"); is(($str =~ /(\P{BC :ArabicLetter}+)/)[0], "\x{070E}\x{070F}"); is(($str =~ /(\P{bc=AL}+)/)[0], "\x{070E}\x{070F}"); # make sure InGreek works $str = "[\x{038B}\x{038C}\x{038D}]"; is(($str =~ /(\p{InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}"); is(($str =~ /(\p{Script:InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}"); is(($str =~ /(\p{Script=InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}"); is(($str =~ /(\p{sc:InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}"); is(($str =~ /(\p{sc=InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}"); use File::Spec; my $updir = File::Spec->updir; # the %utf8::... hashes are already in existence # because utf8_pva.pl was run by utf8_heavy.pl *utf8::PropertyAlias = *utf8::PropertyAlias; # thwart a warning no warnings 'utf8'; # we do not want warnings about surrogates etc # non-General Category and non-Script while (my ($abbrev, $files) = each %utf8::PVA_abbr_map) { my $prop_name = $utf8::PropertyAlias{$abbrev}; next unless $prop_name; next if $abbrev eq "gc_sc"; for (sort keys %$files) { my $filename = File::Spec->catfile( $updir => lib => unicore => lib => $abbrev => "$files->{$_}.pl" ); next unless -e $filename; my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1]; my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1); for my $p ($prop_name, $abbrev) { for my $c ($files->{$_}, $_) { is($str =~ /(\p{$p: $c}+)/ && $1, substr($str, 0, -1)); is($str =~ /(\P{$p= $c}+)/ && $1, substr($str, -1)); } } } } # General Category and Script for my $p ('gc', 'sc') { while (my ($abbr) = each %{ $utf8::PropValueAlias{$p} }) { my $filename = File::Spec->catfile( $updir => lib => unicore => lib => gc_sc => "$utf8::PVA_abbr_map{gc_sc}{$abbr}.pl" ); next unless -e $filename; my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1]; my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1); for my $x ($p, { gc => 'General Category', sc => 'Script' }->{$p}) { for my $y ($abbr, $utf8::PropValueAlias{$p}{$abbr}, $utf8::PVA_abbr_map{gc_sc}{$abbr}) { is($str =~ /(\p{$x: $y}+)/ && $1, substr($str, 0, -1)); is($str =~ /(\P{$x= $y}+)/ && $1, substr($str, -1)); # test_regexp ($str, qr/(\p{$y}+)/, qr/(\P{$y}+)/, $y); test_regexp_2 ($str, $y); } } } } # test extra properties (ASCII_Hex_Digit, Bidi_Control, etc.) SKIP: { skip "Can't reliably derive class names from file names", 592 if $^O eq 'VMS'; # On case tolerant filesystems, Cf.pl will cause a -e test for cf.pl to # return true. Try to work around this by reading the filenames explicitly # to get a case sensitive test. N.B. This will fail if filename case is # not preserved because you might go looking for a class name of CF or cf # when you really want Cf. Storing case sensitive data in filenames is # simply not portable. my %files; my $dirname = File::Spec->catdir($updir => lib => unicore => lib => 'gc_sc'); opendir D, $dirname or die $!; @files{readdir(D)} = (); closedir D; for (keys %utf8::PA_reverse) { my $leafname = "$utf8::PA_reverse{$_}.pl"; next unless exists $files{$leafname}; my $filename = File::Spec->catfile($dirname, $leafname); my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1]; my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1); for my $x ('gc', 'General Category') { print "# $filename $x $_, $utf8::PA_reverse{$_}\n"; for my $y ($_, $utf8::PA_reverse{$_}) { is($str =~ /(\p{$x: $y}+)/ && $1, substr($str, 0, -1)); is($str =~ /(\P{$x= $y}+)/ && $1, substr($str, -1)); test_regexp_2 ($str, $y); } } } } # test the blocks (InFoobar) for (grep $utf8::Canonical{$_} =~ /^In/, keys %utf8::Canonical) { my $filename = File::Spec->catfile( $updir => lib => unicore => lib => gc_sc => "$utf8::Canonical{$_}.pl" ); next unless -e $filename; print "# In$_ $filename\n"; my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1]; my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1); my $blk = $_; # test_regexp ($str, qr/(\p{$blk}+)/, qr/(\P{$blk}+)/, $blk); test_regexp_2 ($str, $blk); $blk =~ s/^In/Block:/; # test_regexp ($str, qr/(\p{$blk}+)/, qr/(\P{$blk}+)/, $blk); test_regexp_2 ($str, $blk); }