class.t faster

Tels Wed, 29 Jun 2005 12:20:07 -0700

-----BEGIN PGP SIGNED MESSAGE-----

Moin,


attached is the patch, and a testversion of t/uni/class.t (so that you can
toy around with it).

The testversion contains two subroutines, one that constructs the qr//
objects in the subroutine, and one that takes them as arguments. The
latter is slower, so the final patch contains the other variant.

I have no idea what's the difference. In any event, the patch changes two
more places to use the subroutine, but on my system the timings do not
change in a measurable way for these, only the very last loop is affacted
by the changes.

On my system (2.0 Ghz AMD XP 2400+) the time goes down from about 31
seconds to about 19 seconds after the patch. The entire testsuite takes
now 175 (opposed to 181 a few days a ago) seconds:

All tests successful.
u=2.68  s=0.71  cu=175.30  cs=25.13  scripts=954  tests=107895

Best wishes,

Tels

- --
 Signed on Wed Jun 29 20:37:44 2005 with key 0x93B84C15.
 Visit my photo gallery at http://bloodgate.com/photos/
 PGP key on http://bloodgate.com/tels.asc or per email.

 "My wife is just a slow brain, running up the bill.." -- Often misheard
 song lyrics #149



-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.2.4 (GNU/Linux)

iQEVAwUBQsL0ZHcLPEOTuEwVAQGGZgf/QBKyd1xJm8TfnpequaZ2pvMQ38/ebTAy
0Lp9yGAGvBqhdSNI/eFeRn6BcoQ5Q2Vm0Wg3fIw2B+Eet2E2L5EvATOv8NAcjNnA
Iy3uQI0j6nrq6LXXZq7tCb6H1AoopECw/GrPPPJp8JCtna6fEKV0Ap4rCjf+h9JB
+e02uCinbKzUKNmQVuCmkFXdMwqhZzFWVKXKagN6X61WIVwWAwie8OgA55QIzqgu
5cJKKZJ4KN3LiO/Biz0HrAkhkoPLuoU7ZxxuC16gqi0rUY+wgGL+nmKpqw6NqtkX
s0w0DrTO1jYe8OG1VQhy7qxb6AphbLOPQryDyRQPpDmojBMqQzgFLA==
=Mi1U
-----END PGP SIGNATURE-----

diff -ruN blead/t/uni/class.t blead.uni_class/t/uni/class.t
--- blead/t/uni/class.t 2005-05-28 17:08:44.000000000 +0200
+++ blead.uni_class/t/uni/class.t       2005-06-29 20:35:18.000000000 +0200
@@ -25,6 +25,30 @@
 END
 }
 
+sub test_regexp ($$) {
+  # test that given string consists of N-1 chars matching $qr1, and 1
+  # char matching $qr2
+  my ($str, $blk) = @_;
+
+  # constructing these objects here makes the last test loop go much faster
+  my $qr1 = qr/(\p{$blk}+)/;
+  if ($str =~ $qr1) {
+    is ($1, substr($str, 0, -1));              # all except last char
+  }
+  else {
+    is ('first N-1 chars did not match', $blk);
+  }
+
+  my $qr2 = qr/(\P{$blk}+)/;
+  if ($str =~ $qr2) {
+    is ($1, substr($str, -1));                 # only last char
+  }
+  else {
+    is ('last char did not match', $blk);
+  }
+}
+
+use strict;
 
 my $str = join "", map chr($_), 0x20 .. 0x6F;
 
@@ -106,8 +130,7 @@
       for my $y ($abbr, $utf8::PropValueAlias{$p}{$abbr}, 
$utf8::PVA_abbr_map{gc_sc}{$abbr}) {
         is($str =~ /(\p{$x: $y}+)/ && $1, substr($str, 0, -1));
         is($str =~ /(\P{$x= $y}+)/ && $1, substr($str, -1));
-        is($str =~ /(\p{$y}+)/ && $1, substr($str, 0, -1));
-        is($str =~ /(\P{$y}+)/ && $1, substr($str, -1));
+       test_regexp ($str, $y);
       }
     }
   }
@@ -127,7 +150,7 @@
 
   my %files;
 
-  my $dirname = File::Spec->catdir($updir => lib => unicore => lib => gc_sc);
+  my $dirname = File::Spec->catdir($updir => lib => unicore => lib => 'gc_sc');
   opendir D, $dirname or die $!;
   @files{readdir(D)} = ();
   closedir D;
@@ -146,8 +169,7 @@
       for my $y ($_, $utf8::PA_reverse{$_}) {
        is($str =~ /(\p{$x: $y}+)/ && $1, substr($str, 0, -1));
        is($str =~ /(\P{$x= $y}+)/ && $1, substr($str, -1));
-       is($str =~ /(\p{$y}+)/ && $1, substr($str, 0, -1));
-       is($str =~ /(\P{$y}+)/ && $1, substr($str, -1));
+       test_regexp ($str, $y);
       }
     }
   }
@@ -160,16 +182,16 @@
   );
 
   next unless -e $filename;
+
+  print "# In$_ $filename\n";
+
   my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1];
   my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1);
 
   my $blk = $_;
 
-  is($str =~ /(\p{$blk}+)/ && $1, substr($str, 0, -1));
-  is($str =~ /(\P{$blk}+)/ && $1, substr($str, -1));
-
+  test_regexp ($str, $blk);
   $blk =~ s/^In/Block:/;
-
-  is($str =~ /(\p{$blk}+)/ && $1, substr($str, 0, -1));
-  is($str =~ /(\P{$blk}+)/ && $1, substr($str, -1));
+  test_regexp ($str, $blk);
 }
+

BEGIN {
    chdir 't' if -d 't';
    @INC = qw(../lib .);
    require "test.pl";
}

plan tests => 4670;

sub MyUniClass {
  <<END;
0030	004F
END
}

sub Other::Class {
  <<END;
0040	005F
END
}

sub A::B::Intersection {
  <<END;
+main::MyUniClass
&Other::Class
END
}

sub test_regexp ($$$$) {
  # test that given string consists of N-1 chars matching $qr1, and 1
  # char matching $qr2
  my ($str, $qr1, $qr2, $blk) = @_;

  if ($str =~ $qr1) {
    is ($1, substr($str, 0, -1));		# all except last char
  }
  else {
    is ('first N-1 chars did not match', $blk);
  }

  if ($str =~ $qr2) {
    is ($1, substr($str, -1));			# only last char
  }
  else {
    is ('last char did not match', $blk);
  }
}

sub test_regexp_2 ($$) {
  # test that given string consists of N-1 chars matching $qr1, and 1
  # char matching $qr2
  my ($str, $blk) = @_;

  my $qr1 = qr/(\p{$blk}+)/;
  my $qr2 = qr/(\P{$blk}+)/;
  if ($str =~ $qr1) {
    is ($1, substr($str, 0, -1));		# all except last char
  }
  else {
    is ('first N-1 chars did not match', $blk);
  }

  if ($str =~ $qr2) {
    is ($1, substr($str, -1));			# only last char
  }
  else {
    is ('last char did not match', $blk);
  }
}

use strict;

my $str = join "", map chr($_), 0x20 .. 0x6F;

# make sure it finds built-in class
is(($str =~ /(\p{Letter}+)/)[0], 'ABCDEFGHIJKLMNOPQRSTUVWXYZ');
is(($str =~ /(\p{l}+)/)[0], 'ABCDEFGHIJKLMNOPQRSTUVWXYZ');

# make sure it finds user-defined class
is(($str =~ /(\p{MyUniClass}+)/)[0], '0123456789:;<=>[EMAIL PROTECTED]');

# make sure it finds class in other package
is(($str =~ /(\p{Other::Class}+)/)[0], '@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_');

# make sure it finds class in other OTHER package
is(($str =~ /(\p{A::B::Intersection}+)/)[0], '@ABCDEFGHIJKLMNO');

# all of these should look in lib/unicore/bc/AL.pl
$str = "\x{070D}\x{070E}\x{070F}\x{0710}\x{0711}";
is(($str =~ /(\P{BidiClass: ArabicLetter}+)/)[0], "\x{070E}\x{070F}");
is(($str =~ /(\P{BidiClass: AL}+)/)[0], "\x{070E}\x{070F}");
is(($str =~ /(\P{BC :ArabicLetter}+)/)[0], "\x{070E}\x{070F}");
is(($str =~ /(\P{bc=AL}+)/)[0], "\x{070E}\x{070F}");

# make sure InGreek works
$str = "[\x{038B}\x{038C}\x{038D}]";

is(($str =~ /(\p{InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
is(($str =~ /(\p{Script:InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
is(($str =~ /(\p{Script=InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
is(($str =~ /(\p{sc:InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");
is(($str =~ /(\p{sc=InGreek}+)/)[0], "\x{038B}\x{038C}\x{038D}");

use File::Spec;
my $updir = File::Spec->updir;

# the %utf8::... hashes are already in existence
# because utf8_pva.pl was run by utf8_heavy.pl

*utf8::PropertyAlias = *utf8::PropertyAlias; # thwart a warning

no warnings 'utf8'; # we do not want warnings about surrogates etc

# non-General Category and non-Script
while (my ($abbrev, $files) = each %utf8::PVA_abbr_map) {
  my $prop_name = $utf8::PropertyAlias{$abbrev};
  next unless $prop_name;
  next if $abbrev eq "gc_sc";

  for (sort keys %$files) {
    my $filename = File::Spec->catfile(
      $updir => lib => unicore => lib => $abbrev => "$files->{$_}.pl"
    );

    next unless -e $filename;
    my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1];
    my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1);

    for my $p ($prop_name, $abbrev) {
      for my $c ($files->{$_}, $_) {
        is($str =~ /(\p{$p: $c}+)/ && $1, substr($str, 0, -1));
        is($str =~ /(\P{$p= $c}+)/ && $1, substr($str, -1));
      }
    }
  }
}

# General Category and Script
for my $p ('gc', 'sc') {
  while (my ($abbr) = each %{ $utf8::PropValueAlias{$p} }) {
    my $filename = File::Spec->catfile(
      $updir => lib => unicore => lib => gc_sc => "$utf8::PVA_abbr_map{gc_sc}{$abbr}.pl"
    );

    next unless -e $filename;
    my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1];
    my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1);

    for my $x ($p, { gc => 'General Category', sc => 'Script' }->{$p}) {
      for my $y ($abbr, $utf8::PropValueAlias{$p}{$abbr}, $utf8::PVA_abbr_map{gc_sc}{$abbr}) {
        is($str =~ /(\p{$x: $y}+)/ && $1, substr($str, 0, -1));
        is($str =~ /(\P{$x= $y}+)/ && $1, substr($str, -1));
#	test_regexp ($str, qr/(\p{$y}+)/, qr/(\P{$y}+)/, $y);
	test_regexp_2 ($str, $y);
      }
    }
  }
}

# test extra properties (ASCII_Hex_Digit, Bidi_Control, etc.)
SKIP:
{
  skip "Can't reliably derive class names from file names", 592 if $^O eq 'VMS';

  # On case tolerant filesystems, Cf.pl will cause a -e test for cf.pl to
  # return true. Try to work around this by reading the filenames explicitly
  # to get a case sensitive test.  N.B.  This will fail if filename case is
  # not preserved because you might go looking for a class name of CF or cf
  # when you really want Cf.  Storing case sensitive data in filenames is 
  # simply not portable.

  my %files;

  my $dirname = File::Spec->catdir($updir => lib => unicore => lib => 'gc_sc');
  opendir D, $dirname or die $!;
  @files{readdir(D)} = ();
  closedir D;

  for (keys %utf8::PA_reverse) {
    my $leafname = "$utf8::PA_reverse{$_}.pl";
    next unless exists $files{$leafname};

    my $filename = File::Spec->catfile($dirname, $leafname);

    my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1];
    my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1);

    for my $x ('gc', 'General Category') {
      print "# $filename $x $_, $utf8::PA_reverse{$_}\n";
      for my $y ($_, $utf8::PA_reverse{$_}) {
	is($str =~ /(\p{$x: $y}+)/ && $1, substr($str, 0, -1));
	is($str =~ /(\P{$x= $y}+)/ && $1, substr($str, -1));
	test_regexp_2 ($str, $y);
      }
    }
  }
}

# test the blocks (InFoobar)
for (grep $utf8::Canonical{$_} =~ /^In/, keys %utf8::Canonical) {
  my $filename = File::Spec->catfile(
    $updir => lib => unicore => lib => gc_sc => "$utf8::Canonical{$_}.pl"
  );

  next unless -e $filename;

  print "# In$_ $filename\n";

  my ($h1, $h2) = map hex, (split(/\t/, (do $filename), 3))[0,1];
  my $str = join "", map chr, $h1 .. (($h2 || $h1) + 1);

  my $blk = $_;

#  test_regexp ($str, qr/(\p{$blk}+)/, qr/(\P{$blk}+)/, $blk);
 test_regexp_2 ($str, $blk);
  $blk =~ s/^In/Block:/;
#  test_regexp ($str, qr/(\p{$blk}+)/, qr/(\P{$blk}+)/, $blk);
 test_regexp_2 ($str, $blk);
}

[PATCH] make t/uni/class.t faster

Reply via email to