[Perlunit-devel] Re: graphing method calls

Adam Spiers Wed, 07 Mar 2001 13:02:16 -0800

Adam Spiers ([EMAIL PROTECTED]) wrote:
> I just spent this afternoon and this evening knocking this up (see
> attached PS file).  It's nowhere near complete and probably buggy, but
> you get the idea.  Could be a useful tool?  (Yes, it will work for any
> Perl, not just Test::Unit.)

OK, attached is the generator (a lot better but still unfinished and
ugly in places), a target class guesser (sorry, no docs yet; read the
code to understand), and sample output: a much improved diagram
showing which methods called which, in Test::Unit::*.  Usage is:

  $ perl -MO=Xref,-raw foo.pm > foo.Xrefs
  $ Xref2dot.pl [ options, see source :-] foo.Xrefs > foo.dot
  $ dot foo.dot -Tps -o foo.ps
  $ gv foo.ps

You can use epssplit or similar to split the diagram over several
pages if you want to print it.

I plan to release this properly at some point; I think it could be a
useful tool.  Why did I write it?  I was trying to figure out why
unwanted stacktraces had crept back in, so was hacking around and got
horribly confused by exception handling and the way TestCase::run()
calls TestResult::run() which calls TestCase::run_bare().  I wished I
had an easier way to visualise the flow control, and then I discovered
B::Xref.  Several hours of intense hacking later ...

I was pretty pleased when the run()->run()->run_bare() circular loop
stuck out like a sore thumb on an otherwise pretty nicely structured
graph.  See the attached output to see what I mean.  This smelt like
slightly bad design to me (and to my colleague, who knows much more
about OO design than me).  Would you agree?  Can it be got rid of?

#!/usr/bin/perl -w
#
# Filter to take the output from B::Xref and convert it into a .dot
# file (see http://www.research.att.com/sw/tools/graphviz/)
#
# Developed as part of the http://www.guideguide.com/ project.
# If you like hacking Perl in a cool environment, come and work for us!
#
# Copyright (c) 2001 Adam Spiers <[EMAIL PROTECTED]>. All rights
# reserved. This program is free software; you can redistribute it and/or
# modify it under the same terms as Perl itself.
#
# Best viewed via M-x folding-mode in emacs.
#

use strict;

use Data::Dumper;
use Getopt::Long;

# {{{ command-line parsing

my %opts = (
            debug      => 0,
            'code-dir' => '.',
            exclude    => "../CPAN-modules.txt",
           );
GetOptions(\%opts, 'debug|d=i', 'code-dir|c=s', 'guesser|g=s',
                   'include|i=s', 'include-regexp|r=s',
                   'exclude|e=s');

my $input = $ARGV[0];

# }}}
# {{{ regexps

my $class_re = '\b[A-Z]\w+(::\w+)*';
my $obj_re   = '\$\w+';
my $obj_or_class_re = "($class_re|$obj_re)";
my $INC_re = '^(' . join('|', map(quotemeta($_), @INC)) . ')/';

# }}}
# {{{ init

my %include_modules = map { $_ => 1 } get_included_modules();
my %exclude_modules = map { $_ => 1 } get_excluded_modules();

# globals are horrid but I'm lazy
my %method2pkgs = ();
my %pkg2methods = ();
my %pkg2calls   = ();
my %target_pkgs = ();
my %all_pkgs    = ();
my %method2node = ();

my $user_guesser = user_guesser();

# }}}

parse_xrefs($input);

debug(3, "---");

output_preamble();
output_boxes();
output_arrows(guess_all_target_classes());
output_postamble();

# {{{ output

sub output_preamble {
  print <<EOF;
digraph use {
//page        = "8.5,11.5";
  rankdir     = "LR";
  ranksep     = "1";
  nodesep     = "0.4";
  ratio       = "compress";
//concentrate = "true";
  labeldistance = 3;

EOF
}

sub output_boxes {
  my $subgraph_num = 0;
  my $node_id = 0;
  while (my ($pkg, $subs) = each %all_pkgs) {
    $subgraph_num++;

    print <<EOF;
  subgraph cluster_$subgraph_num {
    label = "$pkg";
    style = "filled";
    color = "aquamarine";
    edge [color = "black", labeldistance = 4];
//    node [style = "filled", color = "white"];

EOF
    foreach my $sub (keys %$subs) {
      $method2node{"$pkg\::$sub"} = ++$node_id;
      print qq{    node_$node_id [label="$sub"];\n};
    }
    
    print "  }\n\n";
  }
}

sub output_arrows {
  my $arrows = shift;

  my %seen_it = ();

  foreach my $arrow (@$arrows) {
    my ($pkg1, $sub1, $pkg2, $sub2) = @$arrow;
    my $from = "$pkg1\::$sub1";
    my $to = "$pkg2\::$sub2";
    my $src_node = $method2node{$from};
    my $dst_node = $method2node{$to};
    next unless $src_node && $dst_node;
    next if $seen_it{"$src_node // $dst_node"}++;
    
    print "  node_$src_node -> node_$dst_node;\n";
  }
}

sub output_postamble {
  print "}\n";
}

# }}}
# {{{ parse_xrefs

sub parse_xrefs {
  my ($in_file) = @_;

  my ($cur_file, $cur_src_subr, $cur_subr, $cur_pkg)
   = ('', '', '', '', '', '');

  open(XREFS, $in_file) or die "open($in_file): $!\n";
  while (<XREFS>) {
    chomp;

    my ($file, $src_subr, $line, $pkg, $symbol_type, $symbol, $use) = split;

    next if $file =~ m!/Exporter\.pm$!;  # Exporter is weird
    next if $file =~ m!/constant\.pm$!;  # ignore constants

    if ($file ne $cur_file) {
      $cur_file = $file;
      debug(3, "File: $cur_file (Xref line $.)");
    }

    next unless include_file($cur_file);

    if ($src_subr eq '(definitions)') {
      if ($pkg ne $cur_pkg) {
        $cur_pkg = $pkg;
        debug(3, "   Package: $cur_pkg");
      }

      next unless include_pkg($cur_pkg);

      if ($symbol_type eq '&' && $use eq 'subdef') {
        debug(3, "      $symbol() defined");
        push @{ $pkg2methods{$pkg}    }, $symbol;
        push @{ $method2pkgs{$symbol} }, $pkg;
        $all_pkgs{$cur_pkg}{$symbol}++;
      }

      next;
    }
    
    if ($src_subr ne $cur_src_subr) {
      $cur_src_subr = $src_subr;

      if ($src_subr =~ /::/) {
        ($cur_pkg, $cur_subr) = $src_subr =~ /(.*)::(.*)/;
        debug(3, "      Subroutine: $cur_pkg :: $cur_subr");
      }
      elsif ($src_subr eq '(main)') {
        $cur_subr = $src_subr;
        $cur_pkg = 'main';
        debug(3, "      In $src_subr");
      }
    }

    next unless include_pkg($cur_pkg);

    if ($pkg eq '(method)') {
      # in method calls section
      if ($symbol_type eq '->' && $use eq 'subused') {
        my $method_name = $symbol;

        # We're not interested in UNIVERSAL methods
        next if $method_name =~ /^can|isa|VERSION$/;

        # got a method call
        debug(3, "         $method_name() called on line $line");

        if ($method_name =~ /^\w+(::\w+)*$/) {
          push @{ $pkg2calls{$cur_pkg}{$cur_subr}{$method_name}
                    ->{$cur_file} }, $line;
          $all_pkgs{$cur_pkg}{$cur_subr}++;
        }
        else {
          debug(1, "! Skipping strange method name: $method_name");
        }
        next;
      }
    }
  }
  close(XREFS);

  debug(7, Dumper \%method2pkgs);
  debug(7, Dumper \%pkg2methods);
  debug(7, Dumper \%pkg2calls);
}

# }}}
# {{{ guessing

sub user_guesser {
  return sub { } unless $opts{guesser};

  open(GUESSER, $opts{guesser}) or die "open($opts{guesser}): $!\n";
  my $code = join '', map("  $_", <GUESSER>);
  close(GUESSER);

  $code = <<EOCODE;
sub {
  my (\$filename, \$pkg, \$sub, \$line, \$method_name, \$prefix_guess) = \@_;
  
$code
}
EOCODE
  my $coderef = eval $code;
  die "Guesser code failed compilation: $@\n\nCode was:\n$code" if $@;

  return $coderef;
}

sub guess_all_target_classes {
  my @arrows = ();

  while (my ($pkg, $subs) = each %pkg2calls) {
    debug(2, "pkg $pkg");
    while (my ($sub, $method_names) = each %$subs) {
      debug(2, "  sub $sub");
      while (my ($method_name, $files) = each %$method_names) {
        debug(2, "    method $method_name()");
        while (my ($file, $lines) = each %$files) {
          debug(2, "      file $file");
          foreach my $line_num (@$lines) {
            debug(2, "        line number $line_num");

            my $target_class =
              analyse_method_call($file, $pkg, $sub, $method_name, $line_num);

            next unless $target_class;

            push @arrows, [ $pkg, $sub, $target_class, $method_name ];
            $target_pkgs{$target_class}++;
            $all_pkgs{$target_class}{$method_name}++;
          }
        }
      }
    }
  }

  return \@arrows;
}

sub analyse_method_call {
  my ($file, $pkg, $sub, $method_name, $line_num) = @_;

  my $guess = get_method_call_class_prefix($file, $line_num, $method_name);
  if ($guess) {
    my ($offset, $prefix_guess) = @$guess;

    debug(3, "        matched with line fuzz $offset") if $offset;
    my $actual_line_num = $line_num + $offset;
    my $line = get_src_line($file, $actual_line_num);
    chomp $line;

    my $debugging = <<EODEBUG;
        actual line number: $actual_line_num
        line: $line;
        prefix guess: $prefix_guess
EODEBUG
    chomp $debugging;
    debug(2, $debugging);
    my $target_guess =
      guess_target_class($file, $pkg, $sub, $line_num,
                         $method_name, $prefix_guess);

    my ($class_guess, $text);
    if ($target_guess) {
      my $guess_reason;
      ($guess_reason, $class_guess) = @$target_guess;
      $text = "$class_guess ($guess_reason)";
    }
    else {
      $class_guess = $text = 'UNKNOWN CLASS';
    }

    debug(2, <<EODEBUG);
        class guess: $text
EODEBUG

    return $class_guess;
  }

  return undef;
}

sub guess_target_class {
  my ($filename, $pkg, $sub, $line, $method_name, $prefix_guess) = @_;

  # Start with the easy ones

  # If there's only one method of the given name in all known classes,
  # we know for sure which class it belongs to.

  if ($method2pkgs{$method_name} &&
      @{ $method2pkgs{$method_name} } == 1) {
    return [ 'unique method name' => $method2pkgs{$method_name}[0] ];
  }

  if ($prefix_guess =~ /^$class_re$/) {
    return [ 'prefix was a classname' => $prefix_guess ];
  }

  if ($prefix_guess eq '$self') {
    return [ '$prefix was $self' => $pkg ];
  }

  return undef if $method_name =~ /^SUPER::/; # can do better?

  # from now on, things get more dodgy, so we let the user at it
  my $user_guess =
    $user_guesser->($filename, $pkg, $sub, $line, $method_name, $prefix_guess);

  if ($user_guess) {
    return $user_guess;
  }

  return undef;
}

sub get_method_call_class_prefix {
  my ($filename, $line_num, $method_name) = @_;

  # FIXME: What about more than one call per line?
  # FIXME: This is poor guesswork.  We just do the best we can (be arsed to),
  #        but no doubt, there's room for improvement.

  my $line = get_src_line($filename, $line_num);

  # YUK! *Damn* those inaccurate line numbers ...
  my $max_fuzz_ahead  = 100;
  my $max_fuzz_behind = 100;

  my $fuzz = 0;
  while (! match_method_call_class_prefix($line, $method_name) &&
         ($fuzz < $max_fuzz_ahead))
  {
    $line = get_src_line($filename, $line_num + ++$fuzz);
    chomp $line;
    last if subr_boundary($line);
    debug(5, "examining at fuzz $fuzz: $line");
  }

  if (my $match = match_method_call_class_prefix($line, $method_name)) {
    return [ $fuzz, $match ];
  }

  # Look-ahead failed, try look-behind
  $fuzz = 0;
  $line = get_src_line($filename, $line_num);
  while (! match_method_call_class_prefix($line, $method_name) &&
         ($fuzz > -$max_fuzz_behind))
  {
    $line = get_src_line($filename, $line_num + --$fuzz);
    chomp $line;
    last if subr_boundary($line);
    debug(5, "examining at fuzz $fuzz: $line");
  }

  if (my $match = match_method_call_class_prefix($line, $method_name)) {
    return [ $fuzz, $match ];
  }

  $line = get_src_line($filename, $line_num);
  my $context = get_src_lines_with_context($filename, $line_num);
  my $warning = <<EOWARN;
! Method call to $method_name() not found in $filename line $line_num.
! Context was:
$context
EOWARN
  chomp $warning;
  debug(1, $warning);

  return undef;
}

# }}}
# {{{ code parsing

sub subr_boundary {
  my $line = shift;

  return 1 if $line =~ /^\s*sub\s+\w+\s*{/;
  return 1 if $line eq '__INVALID_LINE__';
  return 1 if $line =~ /__END__/;

  return 0;
}

sub match_method_call_class_prefix {
  my ($line, $method_name) = @_;

  if ($line =~ /$obj_or_class_re->$method_name/) {
    return $1;
  }
  elsif ($line =~ /\b$method_name\s+$obj_or_class_re\b/) {
    return $1;
  }

  return;
}

# }}}
# {{{ reading src

{
  my %src = ();

  sub check_src_file {
    my $filename = shift;
    if ($filename !~ m!^/!) {
      die "Couldn't resolve relative filename $filename; use --code-dir option.\n"
        unless $opts{'code-dir'};
      $filename = $opts{'code-dir'} . "/$filename";
    }
    read_src_file($filename) unless $src{$filename};
    return $filename;
  }

  sub get_src_line {
    my ($filename, $line) = @_;
    $filename = check_src_file($filename);
    return $src{$filename}[$line - 1] || '__INVALID_LINE__';
  }

  sub get_src_lines_with_context {
    my ($filename, $origin_num) = @_;
    $filename = check_src_file($filename);

    my @lines = ();
    foreach my $offset (-5 .. 50) {
      my $line_num = $origin_num + $offset;
      next if $line_num < 1 || $line_num > @{ $src{$filename} };
      my $line = get_src_line($filename, $line_num);
      chomp $line;
      next if $line eq '__INVALID_LINE__';
      last if $offset > 0 && subr_boundary($line);
      push @lines, sprintf "!%5d $line\n", $line_num;
    }

    return join '', @lines;
  }

  sub read_src_file {
    my $filename = shift;
    open(SRC, $filename) or die "open($filename): $!";
    $src{$filename} = [ <SRC> ];
    close(SRC);
  }
}

# }}}
# {{{ include/exclude

sub include_file {
  my $file = shift;

  # This relies on @INC being the same as when the .Xrefs input file was made
  $file =~ s/$INC_re//o;
  $file =~ s/\.pm$//;
  $file =~ s!/!::!g;
  # should now have a package name

  return include_pkg($file);
}

sub include_pkg {
  my $pkg = shift;

  if ($pkg !~ /^\w+(::\w+)*$/) {
    debug(8, "      * $pkg not a valid package");
    return 0;
  }

  if ($opts{'include-regexp'} && $pkg !~ /$opts{'include-regexp'}/) {
    debug(6, "      * $pkg didn't match include regexp");
    return 0;
  }

  if ($opts{include} && ! $include_modules{$pkg}) {
    debug(6, "      * $pkg not on include list");
    return 0;
  }

  if ($opts{exclude} && $exclude_modules{$pkg}) {
    debug(6, "      * $pkg on exclude list");
    return 0;
  }

  return 1;
}

sub get_included_modules {
  return () unless $opts{include};
  return get_lines($opts{include});
}

sub get_excluded_modules {
  return () unless $opts{exclude};
  return get_lines($opts{exclude});
}

# }}}
# {{{ utils

sub get_lines {
  my ($file) = @_;

  my @lines = ();

  open(FILE, $file) or die "open($file): $!\n";
  while (<FILE>) {
    chomp;
    push @lines, $_ if $_;
  }
  close(FILE);

  return @lines;
}

sub debug {
  my ($level, @msgs) = @_;

  warn @msgs, "\n" if $level <= $opts{debug};
}

# }}}

if ($prefix_guess eq '$result') {
  return [ '$result' => 'Test::Unit::TestResult' ];
}

if ($prefix_guess eq '$inner_class_name') {
  return [ '$result' => 'Test::Unit::InnerClass' ];
}

if ($prefix_guess eq '$suite') {
  return [ '$result' => 'Test::Unit::TestSuite' ];
}

if ($prefix_guess eq '$test_case') {
  return [ '$result' => 'Test::Unit::TestCase' ];
}

if ($prefix_guess eq '$runner') {
  return [ '$runner' => 'Test::Unit::Runner' ];
}

if ($prefix_guess eq '$e' && $pkg =~ /TestSuite/) {
  return [ foo => $pkg ];
}

if ($prefix_guess eq '$e' && $pkg =~ /TestRunner/) {
  return [ foo => 'Test::Unit::TestFailure' ]
    if $sub =~ /^print_(errors|failures)$/;
}

if ($prefix_guess eq '$e' && $pkg =~ /TestResult/) {
  return [ foo => 'Test::Unit::TestListener' ];
}

if ($prefix_guess eq '$test' && $pkg =~ /TestSuite/) {
  return [ foo => $pkg ];
}

Test-Unit.ps

[Perlunit-devel] Re: graphing method calls

Reply via email to