Adam Spiers ([EMAIL PROTECTED]) wrote:
> I just spent this afternoon and this evening knocking this up (see
> attached PS file). It's nowhere near complete and probably buggy, but
> you get the idea. Could be a useful tool? (Yes, it will work for any
> Perl, not just Test::Unit.)
OK, attached is the generator (a lot better but still unfinished and
ugly in places), a target class guesser (sorry, no docs yet; read the
code to understand), and sample output: a much improved diagram
showing which methods called which, in Test::Unit::*. Usage is:
$ perl -MO=Xref,-raw foo.pm > foo.Xrefs
$ Xref2dot.pl [ options, see source :-] foo.Xrefs > foo.dot
$ dot foo.dot -Tps -o foo.ps
$ gv foo.ps
You can use epssplit or similar to split the diagram over several
pages if you want to print it.
I plan to release this properly at some point; I think it could be a
useful tool. Why did I write it? I was trying to figure out why
unwanted stacktraces had crept back in, so was hacking around and got
horribly confused by exception handling and the way TestCase::run()
calls TestResult::run() which calls TestCase::run_bare(). I wished I
had an easier way to visualise the flow control, and then I discovered
B::Xref. Several hours of intense hacking later ...
I was pretty pleased when the run()->run()->run_bare() circular loop
stuck out like a sore thumb on an otherwise pretty nicely structured
graph. See the attached output to see what I mean. This smelt like
slightly bad design to me (and to my colleague, who knows much more
about OO design than me). Would you agree? Can it be got rid of?
#!/usr/bin/perl -w
#
# Filter to take the output from B::Xref and convert it into a .dot
# file (see http://www.research.att.com/sw/tools/graphviz/)
#
# Developed as part of the http://www.guideguide.com/ project.
# If you like hacking Perl in a cool environment, come and work for us!
#
# Copyright (c) 2001 Adam Spiers <[EMAIL PROTECTED]>. All rights
# reserved. This program is free software; you can redistribute it and/or
# modify it under the same terms as Perl itself.
#
# Best viewed via M-x folding-mode in emacs.
#
use strict;
use Data::Dumper;
use Getopt::Long;
# {{{ command-line parsing
my %opts = (
debug => 0,
'code-dir' => '.',
exclude => "../CPAN-modules.txt",
);
GetOptions(\%opts, 'debug|d=i', 'code-dir|c=s', 'guesser|g=s',
'include|i=s', 'include-regexp|r=s',
'exclude|e=s');
my $input = $ARGV[0];
# }}}
# {{{ regexps
my $class_re = '\b[A-Z]\w+(::\w+)*';
my $obj_re = '\$\w+';
my $obj_or_class_re = "($class_re|$obj_re)";
my $INC_re = '^(' . join('|', map(quotemeta($_), @INC)) . ')/';
# }}}
# {{{ init
my %include_modules = map { $_ => 1 } get_included_modules();
my %exclude_modules = map { $_ => 1 } get_excluded_modules();
# globals are horrid but I'm lazy
my %method2pkgs = ();
my %pkg2methods = ();
my %pkg2calls = ();
my %target_pkgs = ();
my %all_pkgs = ();
my %method2node = ();
my $user_guesser = user_guesser();
# }}}
parse_xrefs($input);
debug(3, "---");
output_preamble();
output_boxes();
output_arrows(guess_all_target_classes());
output_postamble();
# {{{ output
sub output_preamble {
print <<EOF;
digraph use {
//page = "8.5,11.5";
rankdir = "LR";
ranksep = "1";
nodesep = "0.4";
ratio = "compress";
//concentrate = "true";
labeldistance = 3;
EOF
}
sub output_boxes {
my $subgraph_num = 0;
my $node_id = 0;
while (my ($pkg, $subs) = each %all_pkgs) {
$subgraph_num++;
print <<EOF;
subgraph cluster_$subgraph_num {
label = "$pkg";
style = "filled";
color = "aquamarine";
edge [color = "black", labeldistance = 4];
// node [style = "filled", color = "white"];
EOF
foreach my $sub (keys %$subs) {
$method2node{"$pkg\::$sub"} = ++$node_id;
print qq{ node_$node_id [label="$sub"];\n};
}
print " }\n\n";
}
}
sub output_arrows {
my $arrows = shift;
my %seen_it = ();
foreach my $arrow (@$arrows) {
my ($pkg1, $sub1, $pkg2, $sub2) = @$arrow;
my $from = "$pkg1\::$sub1";
my $to = "$pkg2\::$sub2";
my $src_node = $method2node{$from};
my $dst_node = $method2node{$to};
next unless $src_node && $dst_node;
next if $seen_it{"$src_node // $dst_node"}++;
print " node_$src_node -> node_$dst_node;\n";
}
}
sub output_postamble {
print "}\n";
}
# }}}
# {{{ parse_xrefs
sub parse_xrefs {
my ($in_file) = @_;
my ($cur_file, $cur_src_subr, $cur_subr, $cur_pkg)
= ('', '', '', '', '', '');
open(XREFS, $in_file) or die "open($in_file): $!\n";
while (<XREFS>) {
chomp;
my ($file, $src_subr, $line, $pkg, $symbol_type, $symbol, $use) = split;
next if $file =~ m!/Exporter\.pm$!; # Exporter is weird
next if $file =~ m!/constant\.pm$!; # ignore constants
if ($file ne $cur_file) {
$cur_file = $file;
debug(3, "File: $cur_file (Xref line $.)");
}
next unless include_file($cur_file);
if ($src_subr eq '(definitions)') {
if ($pkg ne $cur_pkg) {
$cur_pkg = $pkg;
debug(3, " Package: $cur_pkg");
}
next unless include_pkg($cur_pkg);
if ($symbol_type eq '&' && $use eq 'subdef') {
debug(3, " $symbol() defined");
push @{ $pkg2methods{$pkg} }, $symbol;
push @{ $method2pkgs{$symbol} }, $pkg;
$all_pkgs{$cur_pkg}{$symbol}++;
}
next;
}
if ($src_subr ne $cur_src_subr) {
$cur_src_subr = $src_subr;
if ($src_subr =~ /::/) {
($cur_pkg, $cur_subr) = $src_subr =~ /(.*)::(.*)/;
debug(3, " Subroutine: $cur_pkg :: $cur_subr");
}
elsif ($src_subr eq '(main)') {
$cur_subr = $src_subr;
$cur_pkg = 'main';
debug(3, " In $src_subr");
}
}
next unless include_pkg($cur_pkg);
if ($pkg eq '(method)') {
# in method calls section
if ($symbol_type eq '->' && $use eq 'subused') {
my $method_name = $symbol;
# We're not interested in UNIVERSAL methods
next if $method_name =~ /^can|isa|VERSION$/;
# got a method call
debug(3, " $method_name() called on line $line");
if ($method_name =~ /^\w+(::\w+)*$/) {
push @{ $pkg2calls{$cur_pkg}{$cur_subr}{$method_name}
->{$cur_file} }, $line;
$all_pkgs{$cur_pkg}{$cur_subr}++;
}
else {
debug(1, "! Skipping strange method name: $method_name");
}
next;
}
}
}
close(XREFS);
debug(7, Dumper \%method2pkgs);
debug(7, Dumper \%pkg2methods);
debug(7, Dumper \%pkg2calls);
}
# }}}
# {{{ guessing
sub user_guesser {
return sub { } unless $opts{guesser};
open(GUESSER, $opts{guesser}) or die "open($opts{guesser}): $!\n";
my $code = join '', map(" $_", <GUESSER>);
close(GUESSER);
$code = <<EOCODE;
sub {
my (\$filename, \$pkg, \$sub, \$line, \$method_name, \$prefix_guess) = \@_;
$code
}
EOCODE
my $coderef = eval $code;
die "Guesser code failed compilation: $@\n\nCode was:\n$code" if $@;
return $coderef;
}
sub guess_all_target_classes {
my @arrows = ();
while (my ($pkg, $subs) = each %pkg2calls) {
debug(2, "pkg $pkg");
while (my ($sub, $method_names) = each %$subs) {
debug(2, " sub $sub");
while (my ($method_name, $files) = each %$method_names) {
debug(2, " method $method_name()");
while (my ($file, $lines) = each %$files) {
debug(2, " file $file");
foreach my $line_num (@$lines) {
debug(2, " line number $line_num");
my $target_class =
analyse_method_call($file, $pkg, $sub, $method_name, $line_num);
next unless $target_class;
push @arrows, [ $pkg, $sub, $target_class, $method_name ];
$target_pkgs{$target_class}++;
$all_pkgs{$target_class}{$method_name}++;
}
}
}
}
}
return \@arrows;
}
sub analyse_method_call {
my ($file, $pkg, $sub, $method_name, $line_num) = @_;
my $guess = get_method_call_class_prefix($file, $line_num, $method_name);
if ($guess) {
my ($offset, $prefix_guess) = @$guess;
debug(3, " matched with line fuzz $offset") if $offset;
my $actual_line_num = $line_num + $offset;
my $line = get_src_line($file, $actual_line_num);
chomp $line;
my $debugging = <<EODEBUG;
actual line number: $actual_line_num
line: $line;
prefix guess: $prefix_guess
EODEBUG
chomp $debugging;
debug(2, $debugging);
my $target_guess =
guess_target_class($file, $pkg, $sub, $line_num,
$method_name, $prefix_guess);
my ($class_guess, $text);
if ($target_guess) {
my $guess_reason;
($guess_reason, $class_guess) = @$target_guess;
$text = "$class_guess ($guess_reason)";
}
else {
$class_guess = $text = 'UNKNOWN CLASS';
}
debug(2, <<EODEBUG);
class guess: $text
EODEBUG
return $class_guess;
}
return undef;
}
sub guess_target_class {
my ($filename, $pkg, $sub, $line, $method_name, $prefix_guess) = @_;
# Start with the easy ones
# If there's only one method of the given name in all known classes,
# we know for sure which class it belongs to.
if ($method2pkgs{$method_name} &&
@{ $method2pkgs{$method_name} } == 1) {
return [ 'unique method name' => $method2pkgs{$method_name}[0] ];
}
if ($prefix_guess =~ /^$class_re$/) {
return [ 'prefix was a classname' => $prefix_guess ];
}
if ($prefix_guess eq '$self') {
return [ '$prefix was $self' => $pkg ];
}
return undef if $method_name =~ /^SUPER::/; # can do better?
# from now on, things get more dodgy, so we let the user at it
my $user_guess =
$user_guesser->($filename, $pkg, $sub, $line, $method_name, $prefix_guess);
if ($user_guess) {
return $user_guess;
}
return undef;
}
sub get_method_call_class_prefix {
my ($filename, $line_num, $method_name) = @_;
# FIXME: What about more than one call per line?
# FIXME: This is poor guesswork. We just do the best we can (be arsed to),
# but no doubt, there's room for improvement.
my $line = get_src_line($filename, $line_num);
# YUK! *Damn* those inaccurate line numbers ...
my $max_fuzz_ahead = 100;
my $max_fuzz_behind = 100;
my $fuzz = 0;
while (! match_method_call_class_prefix($line, $method_name) &&
($fuzz < $max_fuzz_ahead))
{
$line = get_src_line($filename, $line_num + ++$fuzz);
chomp $line;
last if subr_boundary($line);
debug(5, "examining at fuzz $fuzz: $line");
}
if (my $match = match_method_call_class_prefix($line, $method_name)) {
return [ $fuzz, $match ];
}
# Look-ahead failed, try look-behind
$fuzz = 0;
$line = get_src_line($filename, $line_num);
while (! match_method_call_class_prefix($line, $method_name) &&
($fuzz > -$max_fuzz_behind))
{
$line = get_src_line($filename, $line_num + --$fuzz);
chomp $line;
last if subr_boundary($line);
debug(5, "examining at fuzz $fuzz: $line");
}
if (my $match = match_method_call_class_prefix($line, $method_name)) {
return [ $fuzz, $match ];
}
$line = get_src_line($filename, $line_num);
my $context = get_src_lines_with_context($filename, $line_num);
my $warning = <<EOWARN;
! Method call to $method_name() not found in $filename line $line_num.
! Context was:
$context
EOWARN
chomp $warning;
debug(1, $warning);
return undef;
}
# }}}
# {{{ code parsing
sub subr_boundary {
my $line = shift;
return 1 if $line =~ /^\s*sub\s+\w+\s*{/;
return 1 if $line eq '__INVALID_LINE__';
return 1 if $line =~ /__END__/;
return 0;
}
sub match_method_call_class_prefix {
my ($line, $method_name) = @_;
if ($line =~ /$obj_or_class_re->$method_name/) {
return $1;
}
elsif ($line =~ /\b$method_name\s+$obj_or_class_re\b/) {
return $1;
}
return;
}
# }}}
# {{{ reading src
{
my %src = ();
sub check_src_file {
my $filename = shift;
if ($filename !~ m!^/!) {
die "Couldn't resolve relative filename $filename; use --code-dir option.\n"
unless $opts{'code-dir'};
$filename = $opts{'code-dir'} . "/$filename";
}
read_src_file($filename) unless $src{$filename};
return $filename;
}
sub get_src_line {
my ($filename, $line) = @_;
$filename = check_src_file($filename);
return $src{$filename}[$line - 1] || '__INVALID_LINE__';
}
sub get_src_lines_with_context {
my ($filename, $origin_num) = @_;
$filename = check_src_file($filename);
my @lines = ();
foreach my $offset (-5 .. 50) {
my $line_num = $origin_num + $offset;
next if $line_num < 1 || $line_num > @{ $src{$filename} };
my $line = get_src_line($filename, $line_num);
chomp $line;
next if $line eq '__INVALID_LINE__';
last if $offset > 0 && subr_boundary($line);
push @lines, sprintf "!%5d $line\n", $line_num;
}
return join '', @lines;
}
sub read_src_file {
my $filename = shift;
open(SRC, $filename) or die "open($filename): $!";
$src{$filename} = [ <SRC> ];
close(SRC);
}
}
# }}}
# {{{ include/exclude
sub include_file {
my $file = shift;
# This relies on @INC being the same as when the .Xrefs input file was made
$file =~ s/$INC_re//o;
$file =~ s/\.pm$//;
$file =~ s!/!::!g;
# should now have a package name
return include_pkg($file);
}
sub include_pkg {
my $pkg = shift;
if ($pkg !~ /^\w+(::\w+)*$/) {
debug(8, " * $pkg not a valid package");
return 0;
}
if ($opts{'include-regexp'} && $pkg !~ /$opts{'include-regexp'}/) {
debug(6, " * $pkg didn't match include regexp");
return 0;
}
if ($opts{include} && ! $include_modules{$pkg}) {
debug(6, " * $pkg not on include list");
return 0;
}
if ($opts{exclude} && $exclude_modules{$pkg}) {
debug(6, " * $pkg on exclude list");
return 0;
}
return 1;
}
sub get_included_modules {
return () unless $opts{include};
return get_lines($opts{include});
}
sub get_excluded_modules {
return () unless $opts{exclude};
return get_lines($opts{exclude});
}
# }}}
# {{{ utils
sub get_lines {
my ($file) = @_;
my @lines = ();
open(FILE, $file) or die "open($file): $!\n";
while (<FILE>) {
chomp;
push @lines, $_ if $_;
}
close(FILE);
return @lines;
}
sub debug {
my ($level, @msgs) = @_;
warn @msgs, "\n" if $level <= $opts{debug};
}
# }}}
if ($prefix_guess eq '$result') {
return [ '$result' => 'Test::Unit::TestResult' ];
}
if ($prefix_guess eq '$inner_class_name') {
return [ '$result' => 'Test::Unit::InnerClass' ];
}
if ($prefix_guess eq '$suite') {
return [ '$result' => 'Test::Unit::TestSuite' ];
}
if ($prefix_guess eq '$test_case') {
return [ '$result' => 'Test::Unit::TestCase' ];
}
if ($prefix_guess eq '$runner') {
return [ '$runner' => 'Test::Unit::Runner' ];
}
if ($prefix_guess eq '$e' && $pkg =~ /TestSuite/) {
return [ foo => $pkg ];
}
if ($prefix_guess eq '$e' && $pkg =~ /TestRunner/) {
return [ foo => 'Test::Unit::TestFailure' ]
if $sub =~ /^print_(errors|failures)$/;
}
if ($prefix_guess eq '$e' && $pkg =~ /TestResult/) {
return [ foo => 'Test::Unit::TestListener' ];
}
if ($prefix_guess eq '$test' && $pkg =~ /TestSuite/) {
return [ foo => $pkg ];
}
Test-Unit.ps