stas        2004/04/13 20:35:39

  Added:       lib/ModPerl CScan.pm
  Log:
  more fixes to Doug's fork of C::Scan is now part of the modperl, so people
  don't have to figure out where to get the fix of the fix version
  
  Revision  Changes    Path
  1.1                  modperl-2.0/lib/ModPerl/CScan.pm
  
  Index: CScan.pm
  ===================================================================
  package ModPerl::CScan;
  
  require Exporter;
  use Config '%Config';
  use File::Basename;
  use Data::Flow qw(0.05);
  use strict;                   # Earlier it catches ISA and EXPORT.
  
  @ModPerl::CScan::ISA = qw(Exporter Data::Flow);
  
  # Items to export into callers namespace by default. Note: do not export
  # names by default without a very good reason. Use EXPORT_OK instead.
  # Do not simply export all your public functions/methods/constants.
  
  @ModPerl::CScan::EXPORT = qw(
            );
  @ModPerl::CScan::EXPORT_OK = qw(
                        );
  # this flag tells cpp to only output macros
  $ModPerl::CScan::MACROS_ONLY = '-dM';
  
  $ModPerl::CScan::VERSION = '0.75';
  
  my (%keywords,%style_keywords);
  for (qw(asm auto break case char continue default do double else enum
          extern float for fortran goto if int long register return short
          sizeof static struct switch typedef union unsigned signed while void 
volatile)) {
    $keywords{$_}++;
  }
  for (qw(bool class const delete friend inline new operator overload private
          protected public virtual)) {
    $style_keywords{'C++'}{$_}++;
  }
  for (qw(__func__ _Complex _Imaginary _Bool inline restrict)) {
    $style_keywords{'C9X'}{$_}++;
  }
  for (qw(inline const asm noreturn section 
        constructor destructor unused weak)) {
    $style_keywords{'GNU'}{$_}++;
    $style_keywords{'GNU'}{"__$ {_}__"}++;
  }
    $style_keywords{'GNU'}{__attribute__}++;
    $style_keywords{'GNU'}{__extension__}++;
    $style_keywords{'GNU'}{__consts}++;
    $style_keywords{'GNU'}{__const}++;
    $style_keywords{'GNU'}{__restrict}++;
  
  my $recipes
    = { Defines => { default => '' },
        cppstdin => { default => $Config{cppstdin} },
        cppflags => { default => $Config{cppflags} },
        cppminus => { default => $Config{cppminus} },
        c_styles => { default => [qw(C++ GNU C9X)] },
        add_cppflags => { default => '' },
        keywords => { prerequisites => ['c_styles'],
                    output => sub {
                      my %kw = %keywords;
                      my %add;
                      for ( @{ shift->{c_styles} } ) {
                        %add = %{ $style_keywords{$_} };
                        %kw = (%kw, %add);
                      }
                      \%kw;
                    }, },
        'undef' => { default => undef },
        filename_filter => { default => undef },
        full_text => { class_filter => [ 'text', 'C::Preprocessed',
                                       qw(undef filename Defines includeDirs Cpp)] },
        text => { class_filter => [ 'text', 'C::Preprocessed',
                                  qw(filename_filter filename Defines includeDirs 
Cpp)] },
        text_only_from => { class_filter => [ 'text_only_from', 'C::Preprocessed',
                                            qw(filename_filter filename Defines 
includeDirs Cpp)] },
        includes => { filter => [ \&includes, 
                                qw(filename Defines includeDirs Cpp) ], },
        includeDirs =>  { prerequisites => ['filedir'], 
                        output => sub {
                          my $data = shift;
                          [ $data->{filedir}, '/usr/local/include', '.'];
                        } },
        Cpp => { prerequisites => [qw(cppminus add_cppflags cppflags cppstdin)], 
               output => sub {
                 my $data = shift;
                 return { cppstdin => $data->{cppstdin},
                          cppflags => "$data->{cppflags} $data->{add_cppflags}",
                          cppminus => $data->{cppminus} };
               } },
        filedir => { output => sub { dirname ( shift->{filename} || '.' ) } },
        sanitized => { filter => [ \&sanitize, 'text'], },
        toplevel => { filter => [ \&top_level, 'sanitized'], },
        full_sanitized => { filter => [ \&sanitize, 'full_text'], },
        full_toplevel => { filter => [ \&top_level, 'full_sanitized'], },
        no_type_decl => { filter => [ \&remove_type_decl, 'toplevel'], },
        typedef_chunks => { filter => [ \&typedef_chunks, 'full_toplevel'], },
        struct_chunks => { filter => [ \&struct_chunks, 'full_toplevel'], },
        typedefs_whited => { filter => [ \&typedefs_whited,
                                       'full_sanitized', 'typedef_chunks',
                                       'keywords_rex'], },
        typedef_texts => { filter => [ \&typedef_texts,
                                     'full_text', 'typedef_chunks'], },
        struct_texts => { filter => [ \&typedef_texts,
                                    'full_text', 'struct_chunks'], },
        typedef_hash => { filter => [ \&typedef_hash,
                                    'typedef_texts', 'typedefs_whited'], },
        typedef_structs => { filter => [ \&typedef_structs,
                                       'typedef_hash', 'struct_texts'], },
        typedefs_maybe => { filter => [ sub {[keys %{+shift}]},
                                      'typedef_hash'], },
        defines_maybe => { filter => [ \&defines_maybe, 'filename'], },
        defines_no_args => { prerequisites => ['defines_maybe'],
                           output => sub { shift->{defines_maybe}->[0] }, },
        defines_args => { prerequisites => ['defines_maybe'],
                        output => sub { shift->{defines_maybe}->[1] }, },
  
        defines_full => { filter => [ \&defines_full, 
                                    qw(filename Defines includeDirs Cpp) ], },
        defines_no_args_full => { prerequisites => ['defines_full'],
                                output => sub { shift->{defines_full}->[0] }, },
        defines_args_full => { prerequisites => ['defines_full'],
                        output => sub { shift->{defines_full}->[1] }, },
  
        decl_inlines => { filter => [ \&functions_in, 'no_type_decl'], },
        inline_chunks => { filter => [ sub { shift->[0] }, 'decl_inlines'], },
        inlines => { filter => [ \&from_chunks, 'inline_chunks', 'text'], },
        decl_chunks => { filter => [ sub { shift->[1] }, 'decl_inlines'], },
        decls => { filter => [ \&from_chunks, 'decl_chunks', 'text'], },
        fdecl_chunks => { filter => [ sub { shift->[4] }, 'decl_inlines'], },
        fdecls => { filter => [ \&from_chunks, 'fdecl_chunks', 'text'], },
        mdecl_chunks => { filter => [ sub { shift->[2] }, 'decl_inlines'], },
        mdecls => { filter => [ \&from_chunks, 'mdecl_chunks', 'text'], },
        vdecl_chunks => { filter => [ sub { shift->[3] }, 'decl_inlines'], },
        vdecls => { filter => [ \&from_chunks, 'vdecl_chunks', 'text'], },
        vdecl_hash => { filter => [ \&vdecl_hash, 'vdecls', 'mdecls' ], },
        parsed_fdecls => { filter => [ \&do_declarations, 'fdecls', 
                                     'typedef_hash', 'keywords'], },
        keywords_rex => { filter => [ sub { my @k = keys %{ shift() };
                                          local $" = '|';
                                          my $r = "(?:@k)";
                                          eval 'qr/$r/' or $r   # Older Perls
                                        }, 'keywords'], },
      };
  
  sub from_chunks {
    my $chunks = shift;
    my $txt = shift;
    my @out;
    my $i = 0;
    while ($i < @$chunks) {
      push @out, substr $txt, $chunks->[$i], $chunks->[ $i + 1 ] - $chunks->[$i];
      $i += 2;
    }
    [EMAIL PROTECTED];
  }
  
  #sub process { request($recipes, @_) }
  # Preloaded methods go here.
  
  sub includes {
    my %seen;
    my $stream = new C::Preprocessed (@_)
      or die "Cannot open pipe from cppstdin: $!\n";
  
    while (<$stream>) {
      next unless m(^\s*\#\s*   # Leading hash
                  (line\s*)?    # 1: Optional line
                  ([0-9]+)\s*   # 2: Line number
                  (.*)          # 3: The rest
                 )x;
      my $include = $3;
      $include = $1 if $include =~ /"(.*)"/; # Filename may be in quotes
      $include =~ s,\\\\,/,g if $^O eq 'os2';
      $seen{$include}++ if $include ne "";
    }
    [keys %seen];
  }
  
  sub defines_maybe {
    my $file = shift;
    my ($mline,$line,%macros,%macrosargs,$sym,$args);
    open(C, $file) or die "Cannot open file $file: $!\n";
    while (not eof(C) and $line = <C>) {
      next unless 
        ( $line =~ s[
                   ^ \s* \# \s* # Start of directive
                   define \s+
                   (\w+)        # 1: symbol
                   (?:
                    \( (.*?) \s* \) # 2: Minimal match for arguments
                                      # in parenths (without trailing
                                      # spaces)
                   )?           # optional, no grouping
                   \s*          # rest is the definition
                   ([\s\S]*)    # 3: the rest
                  ][]x );
      ($sym, $args, $mline) = ($1, $2, $3);
      $mline .= <C> while not eof(C) and $mline =~ s/\\\n/\n/;
      chomp $mline;
      #print "sym: `$sym', args: `$args', mline: `$mline'\n";
      if (defined $args) {
        $macrosargs{$sym} = [ [split /\s*,\s*/, $args], $mline];
      } else {
        $macros{$sym} = $mline;
      }
    }
    close(C) or die "Cannot close file $file: $!\n";
    [\%macros, \%macrosargs];
  }
  
  sub defines_full {
    my $Cpp = $_[3];
    my ($mline,$line,%macros,%macrosargs,$sym,$args);
  
    # save the old cppflags and add the flag for only ouputting macro definitions
    my $old_cppstdin = $Cpp->{'cppstdin'};
    $Cpp->{'cppstdin'} = $old_cppstdin . " " . $ModPerl::CScan::MACROS_ONLY;
  
    my $stream = new C::Preprocessed (@_)
      or die "Cannot open pipe from cppstdin: $!\n";
  
    while (defined ($line = <$stream>)) {
      next unless 
        ( $line =~ s[
                   ^ \s* \# \s* # Start of directive
                   define \s+
                   (\w+)        # 1: symbol
                   (?:
                    \( (.*?) \s* \) # 2: Minimal match for arguments
                                      # in parenths (without trailing
                                      # spaces)
                   )?           # optional, no grouping
                   \s*          # rest is the definition
                   ([\s\S]*)    # 3: the rest
                  ][]x );
      ($sym, $args, $mline) = ($1, $2, $3);
      $mline .= <$stream> while ($mline =~ s/\\\n/\n/);
      chomp $mline;
  #print STDERR "sym: `$sym', args: `$args', mline: `$mline'\n";
      if (defined $args) {
        $macrosargs{$sym} = [ [split /\s*,\s*/, $args], $mline];
      } else {
        $macros{$sym} = $mline;
      }
    }
    # restore the original cppflags
    $Cpp->{'cppstdin'} = $old_cppstdin;
    [\%macros, \%macrosargs];
  }
  
  sub typedef_chunks {          # Input is toplevel, output: starts and ends
    my $txt = shift;
    pos $txt = 0;
    my ($b, $e, @out);
    while ($txt =~ /\btypedef\b/g) {
      push @out, pos $txt;
      $txt =~ /(?=;)|\Z/g;
      push @out, pos $txt;
    }
    [EMAIL PROTECTED];
  }
  
  sub struct_chunks {
    my $txt = shift;
    pos $txt = 0;
    my ($b, $e, @out);
    while ($txt =~ /\b(?=struct\s*(\w*\s*)?\{)/g) {
      push @out, pos $txt;
      $txt =~ /(?=;)|\Z/g;
      push @out, pos $txt;
    }
    [EMAIL PROTECTED];
  }
  
  sub typedefs_whited {         # Input is sanitized text, and list of beg/end.
    my @lst = @{$_[1]};
    my @out;
    my ($b, $e);
    while ($b = shift @lst) {
      $e = shift @lst;
      push @out, whited_decl($_[2], substr $_[0], $b, $e - $b);
    }
    [EMAIL PROTECTED];
  }
  
  sub structs_whited {
    my @lst = @{$_[1]};
    my @out;
    my ($b, $e, $in);
    while ($b = shift @lst) {
      $e = shift @lst;
      $in = substr $_[0], $b, $e - $b;
      $in =~ s/^(struct\s*(\w*\s*)?)(.*)$/$1 . " " x length($3)/es;
      push @out, $in;
    }
    [EMAIL PROTECTED];
  }
  
  sub typedef_texts {
    my ($txt, $chunks) = (shift, shift);
    my ($b, $e, $in, @out);
    my @in = @$chunks;
    while (($b, $e) = splice @in, 0, 2) {
      $in = substr($txt, $b, $e - $b);
      # remove any remaining directives
      $in =~ s/^ ( \s* \# .* ( \\ $ \n .* )* ) / ' ' x length($1)/xgem;
      push @out, $in;
    }
    [EMAIL PROTECTED];
  }
  
  sub typedef_hash {
    my ($typedefs, $whited) = (shift,shift);
    my %out;
  
   loop:
    for my $o (0..$#$typedefs) {
      my $wh = $whited->[$o];
      my $td = $typedefs->[$o];
  #my $verb = $td =~ /apr_child_errfn_t/ ? 1 : 0;
  #warn "$wh || $td\n" if $verb;
      if ($wh =~ /,/ or not $wh =~ /\w/) { # Hard case, guessimates ...
        # Determine whether the new thingies are inside parens
        $wh =~ /,/g;
        my $p = pos $wh;
        my ($s, $e);
        if (matchingbrace($wh)) {       # Inside.  Easy part: just split on /,/...
        $e = pos($wh) - 1;
        $s = $e;
        my $d = 0;
        # Skip back
        while (--$s >= 0) {
          my $c = substr $wh, $s, 1;
          if ($c =~ /[\(\{\[]/) {
            $d--;
          } elsif ($c =~ /[\)\]\}]/) {
            $d++;
          }
          last if $d < 0;
        }
        if ($s < 0) {           # Should not happen
          warn("panic: could not match braces in\n\t$td\nwhited as\n\t$wh\n");
          next loop;
        }
        $s++;
        } else {                        # We are at toplevel
        # We need to skip back all the modifiers attached to the first thingy
        # Guesstimates: everything after the first '*' (inclusive)
        pos $wh = 0;
        $wh = /(?=\w)/g;
        my $ws = pos $wh;
        my $pre = substr $wh, 0, $ws;
        $s = $ws;
        $s = pos $pre if $pre =~ /(?=\*)/g;
        $e = length $wh;
        }
        # Now: need to split $td based on commas in $wh!
        # And need to split each chunk of $td based on word in the chunk of $wh!
        my $td_decls = substr($td, $s, $e - $s);
        my ($pre, $post) = (substr($td, 0, $s), substr($td, $e));
        my $wh_decls = substr($wh, $s, $e - $s);
        my @wh_decls = split /,/, $wh_decls;
        my $td_s = 0;
        my (@td_decl, @td_pre, @td_post, @td_word);
        for my $wh_d (@wh_decls) {
        my $td_d = substr $td, $td_s, length $wh_d;
        push @td_decl, $td_d;
        $wh_d =~ /(\w+)/g;
        push @td_word, $1;
        push @td_post, substr $td_d, pos($wh_d);
        push @td_pre,  substr $td_d, pos($wh_d) - length $1, length $1;
        $td_s += 1 + length $wh_d; # Skip over ','
        }
        for my $i (0..$#wh_decls) {
        my $p = "$td_post[$i]$post";
        $p = '' unless $p =~ /\S/;
        $out{$td_word[$i]} = ["$pre$td_pre[$i]", $p];
        }
      } elsif ($td =~ /\(\s* \*? \s* ([^)]+) \s* \) \s* \(.*\)/gxs){    # XXX: 
function pointer typedef
        $out{$1} = ['XXX: pre_foo', 'XXX: post_bar']; # XXX: not sure what to stuff 
here
        #warn "[$1] [$td]" if $verb;
      } else {                  # Only one thing defined...
        $wh =~ /(\w+)/g;
        my $e   = pos $wh;
        my $s   = $e - length $1;
        my $type        = $1;
        my $pre = substr $td, 0, $s;
        my $post        = substr $td, $e, length($td) - $e;
        $post = '' unless $post =~ /\S/;
        $out{$type} = [$pre, $post];
      }
  
      #die if $verb;
  
    }
    \%out;
  }
  
  sub typedef_structs {
    my($typehash, $structs) = @_;
    my %structs;
    for (0 .. $#$structs) {
      my $in = $structs->[$_];
      my $key;
      next unless $in =~ /^struct\s*(\w+)/;
      next unless $in =~ s{^(struct\s*)(\w+)}{
        $key = "struct $2";
        $1 . " " x length($2)
      }e;
      my $name = parse_struct($in, \%structs);
      $structs{$key} = defined($name) ? $structs{$name} : undef;
    }
    while (my($key, $text) = each %$typehash) {
      my $name = parse_struct($text->[0], \%structs);
      $structs{$key} = defined($name) ? $structs{$name} : undef;
    }
    \%structs;
  }
  
  sub parse_struct {
    my($in, $structs) = @_;
    my($b, $e, $chunk, $vars, $struct, $structname);
    return "$1 $2" if $in =~ /
      ^ \s* (struct | union) \s+ (\w+) \s* $
    /x;
    ($structname, $in) = $in =~ /
      ^ \s* ( (?: struct | union ) (?: \s+ \w+ )? ) \s* { \s* (.*?) \s* } \s* $
    /gisx or return;
    $structname .= " _ANON" unless $structname =~ /\s/;
    $structname .= " 0" if exists $structs->{$structname};
    $structname =~ s/(\d+$)/$1 + 1/e while exists $structs->{$structname};
    $structname =~ s/\s+/ /g;
    $b = 0;
    while ($in =~ /(\{|;|$)/g) {
      matchingbrace($in), next if $1 eq '{';
      $e = pos($in);
      next if $b == $e;
      $chunk = substr($in, $b, $e - $b);
      $b = $e;
      if ($chunk =~ /\G\s*(struct|union).*\}/gs) {
        my $term = pos $chunk;
        my $name = parse_struct(substr($chunk, 0, $term), $structs);
        $vars = parse_vars(join ' ', $name, substr $chunk, $term);
      } else {
        $vars = parse_vars($chunk);
      }
      push @$struct, @{$vars||[]};
    }
    $structs->{$structname} = $struct;
    $structname;
  }
  
  sub parse_vars {
    my $in = shift;
    my($vars, $type, $word, $id, $post, $func);
  
    while ($in =~ /\G\s*([\[;,(]|\*+|:\s*\d+|\S+?\b|$)\s*/gc) {
      $word = $1;
      if ($word eq ';' || $word eq '') {
        next unless defined $id;
        $type = 'int' unless defined $type;     # or is this an error?
        push @$vars, [ $type, $post, $id ];
        ($type, $post, $id, $func) = (undef, undef, undef);
      } elsif ($word eq ',') {
        warn "panic: expecting name before comma in '$in'\n" unless defined $id;
        $type = 'int' unless defined $type;     # or is this an error?
        push @$vars, [ $type, $post, $id ];
        $type =~ s/[ *]*$//;
        $id = undef;
      } elsif ($word eq '[') {
        warn "panic: expecting name before '[' in '$in'\n" unless defined $id;
        $type = 'int' unless defined $type;     # or is this an error?
        my $b = pos $in;
        matchingbrace($in);
        $post .= $word . substr $in, $b, pos($in) - $b;
      } elsif ($word eq '(') {
        # simple hack for function pointers
        $type = join ' ', grep defined, $type, $id if defined $id;
        $type = 'int' unless defined $type;
        if ($in =~ /\G\s*(\*[\s\*]*?)\s*(\w+)[\[\]\d\s]*(\)\s*\()/gc) {
        $type .= "($1";
        $id = $2;
        $post = $3;
        my $b = pos $in;
        matchingbrace($in);
        $post .= substr $in, $b, pos($in) - $b;
        } else {
        warn "panic: can't parse function pointer declaration in '$in'\n";
        return;
        }
      } elsif ($word =~ /^:/) {
        # bitfield
        $type = 'int' unless defined $type;
        $post .= $word;
      } else {
        if (defined $post) {
        if ($func) {
          $post .= $word;
        } else {
          warn "panic: not expecting '$word' after array bounds in '$in'\n";
        }
        } else {
        $type = join ' ', grep defined, $type, $id if defined $id;
        $id = $word;
        }
      }
    }
  unless ($vars) {
    warn sprintf "failed on <%s> with type=<%s>, id=<%s>, post=<%s> at pos=%d\n",
      $in, $type, $id, $post, pos($in);
  }
    $vars;
  }
  
  sub vdecl_hash {
    my($vdecls, $mdecls) = @_;
    my %vdecl_hash;
    for (@$vdecls, @$mdecls) {
      next if /[()]/;   # ignore functions, and function pointers
      my $copy = $_;
      next unless $copy =~ s/^\s*extern\s*//;
      my $vars = parse_vars($copy);
      $vdecl_hash{$_->[2]} = [ @$_[0, 1] ] for @$vars;
    }
    \%vdecl_hash;
  }
  
  # The output is the list of list of inline chunks and list of
  # declaration chunks.
  
  sub functions_in {            # The arg is text without type declarations.
    my $in = shift;             # remove_type_decl(top_level(sanitize($txt)));
    # What remains now consists of variable and function declarations,
    # and inline functions.
    $in =~ /(?=\S)/g;
    my ($b, $e, $b1, $e1, @inlines, @decls, @mdecls, @fdecls, @vdecls);
    $b = pos $in;
    my $chunk;
    while (defined($b) && $b != length $in) {
      $in =~ /;/g or pos $in = $b, $in =~ /.*\S|\Z/g ; # Or last non-space
      $e = pos $in;
      $chunk = substr $in, $b, $e - $b;
      # Now subdivide the chunk.
      # 
      # What we got is one chunk, probably finished by `;'. Whoever, it
      # may start with several inline functions.
      #
      # Note that inline functions contain ( ) { } in the stripped version.
      $b1 = 0;
      while ($chunk =~ /\(\s*\)\s*\{\s*\}/g) {
        $e1 = pos $chunk;
        push @inlines, $b + $b1, $b + $e1;
        $chunk =~ /(?=\S)/g;
        $b1 = pos $chunk; 
        $b1 = length $chunk, last unless defined $b1;
      }
      if ($e - $b - $b1 > 0) {
        my($isvar, $isfunc) = (1, 1);
        substr ($chunk, 0, $b1) = '';
        if ($chunk =~ /,/) {    # Contains multiple declarations.
        push @mdecls, $b + $b1, $e;
        } else  {                       # Non-multiple.
        # Since leading \s* is not optimized, this is quadratic!
        $chunk =~ s{
                     ( ( const | __const
                         | __attribute__ \s* \( \s* \)
                       ) \s* )* ( ; \s* )? \Z # Strip from the end
                   }()x;
        $chunk =~ s/\s*\Z//;
        if ($chunk =~ /\)\Z/) { # Function declaration ends on ")"!
          if ($chunk !~ m{ 
                          \( .* \( # Multiple parenths
                         }x
              and $chunk =~ / \w \s* \( /x) { # Most probably pointer to a function?
            $isvar = 0;
          }
        } elsif ($chunk =~ /
          ^ \s* (enum|struct|union|class) \s+ \w+ \s* $
        /x) {
          $isvar = $isfunc = 0;
        }
        if ($isvar)  {  # Heuristically variable
          push @vdecls, $b + $b1, $e;
        } elsif ($isfunc) {
          push @fdecls, $b + $b1, $e;
        }
        }
        push @decls, $b + $b1, $e if $isvar || $isfunc;
      }
      $in =~ /\G\s*/g ;
      $b = pos $in;
    }
    [EMAIL PROTECTED], [EMAIL PROTECTED], [EMAIL PROTECTED], [EMAIL PROTECTED], [EMAIL 
PROTECTED];
  }
  
  # XXXX This is heuristical in many respects...
  # Recipe: remove all struct-ish chunks.  Remove all array specifiers.
  # Remove GCC attribute specifiers.
  # What remains may contain function's arguments, old types, and newly
  # defined types.
  # Remove function arguments using heuristics methods.
  # Now out of several words in a row the last one is a newly defined type.
  
  sub whited_decl {             # Input is sanitized.
    my $keywords_rex = shift;
    my $in = shift;             # Text of a declaration
  
    #typedef ret_type*(*func) -> typedef ret_type* (*func)
    $in =~ s/\*\(\*/* \(*/;
  
    my $rest  = $in;
    my $out  = $in;             # Whited out $in
  
    # Remove all the structs
    while ($out =~ /(\b(struct|union|class|enum)(\s+\w+)?\s*\{)/g) {
      my $pos_start = pos($out) - length $1;
  
      matchingbrace($out);
      my $pos_end = pos $out;
      substr($out, $pos_start, $pos_end - $pos_start) =
        ' ' x ($pos_end - $pos_start);
      pos $out = $pos_end;
    }
  
    # Deal with glibc's wierd ass __attribute__ tag.  Just dump it.
    # Maaaybe this should check to see if you're using GCC, but I don't
    # think so since glibc is nice enough to do that for you.  [MGS]
    while ( $out =~ m/(\b(__attribute__|attribute)\s*\((?=\s*\())/g ) {
        my $att_pos_start = pos($out) - length($1);
  
        # Need to figure out where ((..)) ends.
        matchingbrace($out);
        my $att_pos_end = pos $out;
  
        # Remove the __attribute__ tag.
        substr($out, $att_pos_start, $att_pos_end - $att_pos_start) =
        ' ' x ($att_pos_end - $att_pos_start);
        pos $out = $att_pos_end;
    }
  
    # Remove arguments of functions (heuristics only).
    # These things (start) arglist of a declared function:
    # paren word comma
    # paren word space non-paren
    # paren keyword paren
    # start a list of arguments. (May be "cdecl *myfunc"?) XXXXX ?????
    while ( $out =~ /(\(\s*(\w+(,|\s*[^\)\s])|$keywords_rex\s*\)))/g ) {
      my $pos_start = pos($out) - length($1);
      pos $out = $pos_start + 1;
      matchingbrace($out);
      substr ($out, $pos_start + 1, pos($out) - 2 - $pos_start)
        = ' ' x (pos($out) - 2 - $pos_start);
    }
    # Remove array specifiers
    $out =~ s/(\[[\w\s\+]*\])/ ' ' x length $1 /ge;
    my $tout = $out;
    # Several words in a row cannot be new typedefs, but the last one.
    $out =~ s/((\w+\**\s+)+(?=[^\s,;\[\{\)]))/ ' ' x length $1 /ge;
    unless ($out =~ /\w/) {
      # Probably a function-type declaration: typedef int f(int);
      # Redo scan leaving the last word of the first group of words:
      $tout =~ /(\w+\s+)*(\w+)\s*\(/g;
      $out = ' ' x (pos($tout) - length $2)
        . $2 . ' ' x (length($tout) - pos($tout));
      # warn "function typedef\n\t'$in'\nwhited-out as\n\t'$out'\n";
    }
    warn "panic: length mismatch\n\t'$in'\nwhited-out as\n\t'$out'\n"
      if length($in) != length $out;
    # Sanity check
    warn "panic: multiple types without intervening comma in\n\t$in\nwhited-out 
as\n\t$out\n"
      if $out =~ /\w[^\w,]+\w/;
    warn "panic: no types found in\n\t$in\nwhited-out as\n\t$out\n"
      unless $out =~ /\w/;
    $out
  }
  
  sub matchingbrace {
    # pos($_[0]) is after the opening brace now
    my $n = 0;
    while ($_[0] =~ /([\{\[\(])|([\]\)\}])/g) {
      $1 ? $n++ : $n-- ;
      return 1 if $n < 0;
    }
    # pos($_[0]) is after the closing brace now
    return;                             # false
  }
  
  sub remove_Comments_no_Strings { # We expect that no strings are around
      my $in = shift;
      $in =~ s,/(/.*|\*[\s\S]*?\*/),,g ; # C and C++
      die "Unfinished comment" if $in =~ m,/\*, ;
      $in;
  }
  
  sub sanitize {                # We expect that no strings are around
      my $in = shift;
      # C and C++, strings and characters
      $in =~ s{ / (
                 / .*                   # C++ style
                 |
                 \* [\s\S]*? \*/        # C style
                )                       # (1)
             | '((?:[^\\\']|\\.)+)'     # (2) Character constants
             | "((?:[^\\\"]|\\.)*)"     # (3) Strings
             | ( ^ \s* \# .*            # (4) Preprocessor
                 ( \\ $ \n .* )* )      # and continuation lines
            } {
              # We want to preserve the length, so that one may go back
              defined $1 ? ' ' x (1 + length $1) :
                defined $4 ? ' ' x length $4 :
                  defined $2 ? "'" . ' ' x length($2) . "'" :
                    defined $3 ? '"' . ' ' x length($3) . '"' : '???'
            }xgem ;
      die "Unfinished comment" if $in =~ m{ /\* }x;
      $in;
  }
  
  sub top_level {                       # We expect argument is sanitized
    # Note that this may remove the variable in declaration: int (*func)();
    my $in = shift;
    my $start;
    my $out = $in;
    while ($in =~ /[\[\{\(]/g ) {
      $start = pos $in;
      matchingbrace($in);
      substr($out, $start, pos($in) - 1 - $start) 
        = ' ' x (pos($in) - 1 - $start);
    }
    $out;
  }
  
  sub remove_type_decl {                # We suppose that the arg is top-level only.
    my $in = shift;
    $in =~ s/(\b__extension__)(\s+typedef\b)/(' ' x length $1) . $2/gse;
    $in =~ s/(\btypedef\b.*?;)/' ' x length $1/gse;
    # The following form may appear only in the declaration of the type itself:
    $in =~ 
      s/(\b(enum|struct|union|class)\b[\s\w]*\{\s*\}\s*;)/' ' x length $1/gse;
    $in;
  }
  
  sub new {
    my $class = shift;
    my $out = SUPER::new $class $recipes;
    $out->set(@_);
    $out;
  }
  
  sub do_declarations {
    my @d = map do_declaration($_, $_[1], $_[2]), @{ $_[0] };
    [EMAIL PROTECTED];
  }
  
  # Forth argument: if defined, there maybe no identifier. Generate one
  # basing on this argument.
  
  sub do_declaration {
    my ($decl, $typedefs, $keywords, $argnum) = @_;
    $decl =~ s/;?\s*$//;
  
    my ($type, $typepre, $typepost, $ident, $args, $w, $pos, $repeater);
    $decl =~ s/[\r\n]\s*/ /g;
  #warn "DECLAR [$decl][$argnum]\n";
    $decl =~ s/^\s*__extension__\b\s*//;
    $decl =~ s/^\s*extern\b\s*//;
    $decl =~ s/^\s*__inline\b\s*//;
    $pos = 0;
    while ($decl =~ /(\w+)/g and ($typedefs->{$1} or $keywords->{$1})) {
      $w = $1;
      if ($w =~ /^(struct|class|enum|union)$/) {
        $decl =~ /\G\s+\w+/g or die "`$w' is not followed by word in `$decl'";
      }
      $pos = pos $decl;
    }
  #warn "pos: $pos\n";
    pos $decl = $pos;
    $decl =~ /\G[\s*]*\*/g or pos $decl = $pos;
    $type = substr $decl, 0, pos $decl;
    $decl =~ /\G\s*/g or pos $decl = length $type; # ????
    $pos = pos $decl;
  #warn "pos: $pos\n";
    if (defined $argnum) {
      if ($decl =~ /\G(\w+)((\s*\[[^][]*\])*)/g) { # The best we can do with [2]
        $ident = $1;
        $repeater = $2;
        $pos = pos $decl;
      } else {
        pos $decl = $pos = length $decl;
        $type = $decl;
        $ident = "arg$argnum";
      }
    } else {
      die "Cannot process declaration `$decl' without an identifier"
        unless $decl =~ /\G(\w+)/g;
      $ident = $1;
      $pos = pos $decl;
    }
  #warn "pos: $pos\n";
    $decl =~ /\G\s*/g or pos $decl = $pos;
    $pos = pos $decl;
  #my $st = length $decl;
  #warn substr($decl, 0, $pos), "\n";
  #warn "pos: $pos $st\n";
  #warn "DECLAR [$decl][$argnum]\n";
    if (pos $decl != length $decl) {
      pos $decl = $pos;
      die "Expecting parenth after identifier in `$decl'\nafter `",
        substr($decl, 0, $pos), "'"
        unless $decl =~ /\G\(/g;
      my $argstring = substr($decl, pos($decl) - length $decl);
      matchingbrace($argstring) or die "Cannot find matching parenth in `$decl'";
      $argstring = substr($argstring, 0, pos($argstring) - 1);
      $argstring =~ s/ ^ ( \s* void )? \s* $ //x;
      $args = [];
      my @args;
      if ($argstring ne '') {
        my $top = top_level $argstring;
        my $p = 0;
        my $arg;
        while ($top =~ /,/g) {
        $arg = substr($argstring, $p, pos($top) - 1 - $p);
        $arg =~ s/^\s+|\s+$//gs;
        push @args, $arg;
        $p = pos $top;
        }
        $arg = substr $argstring, $p;
        $arg =~ s/^\s+|\s+$//gs;
        push @args, $arg;
      }
      my $i = 0;
      for (@args) {
        push @$args, do_declaration1($_, $typedefs, $keywords, $i++);
      }
    }
    [$type, $ident, $args, $decl, $repeater];
  }
  
  sub do_declaration1 {
    my ($decl, $typedefs, $keywords, $argnum) = @_;
    $decl =~ s/;?\s*$//;
  #warn "DECLARO [$decl][$argnum]\n";
    my ($type, $typepre, $typepost, $ident, $args, $w, $pos, $repeater);
    $pos = 0;
    while ($decl =~ /(\w+)/g and ($typedefs->{$1} or $keywords->{$1})) {
      $w = $1;
      if ($w =~ /^(struct|class|enum|union)$/) {
        $decl =~ /\G\s+\w+/g or die "`$w' is not followed by word in `$decl'";
      }
      $pos = pos $decl;
    }
  #warn "POS: $pos\n";
    pos $decl = $pos;
    $decl =~ /\G[\s*]*\*/g or pos $decl = $pos;
    $type = substr $decl, 0, pos $decl;
    $decl =~ /\G\s*/g or pos $decl = length $type; # ????
    $pos = pos $decl;
    if (defined $argnum) {
      if ($decl =~ /\G(\w+)((\s*\[[^][]*\])*)/g) { # The best we can do with [2]
        $ident = $1;
        $repeater = $2;
        $pos = pos $decl;
      } else {
        pos $decl = $pos = length $decl;
        $type = $decl;
        $ident = "arg$argnum";
      }
    } else {
      die "Cannot process declaration `$decl' without an identifier" 
        unless $decl =~ /\G(\w+)/g;
      $ident = $1;
      $pos = pos $decl;
    }
    $decl =~ /\G\s*/g or pos $decl = $pos;
    $pos = pos $decl;
  #warn "DECLAR1 [$decl][$argnum]\n";
  #my $st = length $decl;
  #warn substr($decl, 0, $pos), "\n";
  #warn "pos: $pos $st\n";
    if (pos $decl != length $decl) {
      pos $decl = $pos;
      die "Expecting parenth after identifier in `$decl'\nafter `",
        substr($decl, 0, $pos), "'"
        unless $decl =~ /\G\(/g;
      my $argstring = substr($decl, pos($decl) - length $decl);
      matchingbrace($argstring) or die "Cannot find matching parenth in `$decl'";
      $argstring = substr($argstring, 0, pos($argstring) - 1);
      $argstring =~ s/ ^ ( \s* void )? \s* $ //x;
      $args = [];
      my @args;
      if ($argstring ne '') {
        my $top = top_level $argstring;
        my $p = 0;
        my $arg;
        while ($top =~ /,/g) {
        $arg = substr($argstring, $p, pos($top) - 1 - $p);
        $arg =~ s/^\s+|\s+$//gs;
        push @args, $arg;
        $p = pos $top;
        }
        $arg = substr $argstring, $p;
        $arg =~ s/^\s+|\s+$//gs;
        push @args, $arg;
      }
      my $i = 0;
      for (@args) {
        push @$args, do_declaration1($_, $typedefs, $keywords, $i++);
      }
    }
    [$type, $ident, $args, $decl, $repeater];
  }
  
  ############################################################
  
  package C::Preprocessed;
  use Symbol;
  use File::Basename;
  use Config;
  
  sub new {
      die "usage: C::Preprocessed->new(filename[, defines[, includes[, cpp]]])" 
        if @_ < 2 or @_ > 5;
      my ($class, $filename, $Defines, $Includes, $Cpp) 
        = (shift, shift, shift, shift, shift);
      $Cpp ||= \%Config::Config;
      my $filedir = dirname $filename || '.';
      $Includes ||= [$filedir, '/usr/local/include', '.'];
      my $addincludes = "";
      $addincludes = "-I" . join(" -I", @$Includes)
        if defined $Includes and @$Includes;
      my($sym) = gensym;
      my $cmd = "echo '\#include \"$filename\"' | $Cpp->{cppstdin} $Defines 
$addincludes $Cpp->{cppflags} $Cpp->{cppminus} |";
      #my $cmd = "$Cpp->{cppstdin} $Defines $addincludes $Cpp->{cppflags} 
$Cpp->{cppminus} < $filename |";
      #my $cmd = "echo '\#include <$filename>' | $Cpp->{cppstdin} $Defines 
$addincludes $Cpp->{cppflags} $Cpp->{cppminus} |";
  
      (open($sym, $cmd) or die "Cannot open pipe from `$cmd': $!")
        and bless $sym => $class;
  }
  
  sub text {
    my $class = shift;
    my $filter = shift;
    if (defined $filter) {
      return text_only_from($class, $filter, @_);
    }
    my $stream = $class->new(@_);
    my $oh = select $stream;
    local $/;
    select $oh;
    <$stream>;
  }
  
  sub text_only_from {
    my $class = shift;
    my $from = shift || die "Expecting argument in `text_only_from'";
    my $stream = $class->new(@_);
    my $on = $from eq $_[0];
    my $eqregexp = $on ? '\"\"|' : '';
    my @out;
    while (<$stream>) {
      #print;
  
      $on = /$eqregexp[\"\/]\Q$from\"/ if /^\#/;
      push @out, $_ if $on;
    }
    join '', @out;
  }
  
  sub DESTROY {
    close($_[0]) 
      or die "Cannot close pipe from `$Config::Config{cppstdin}': err $?, $!\n";
  }
  
  # Autoload methods go after __END__, and are processed by the autosplit program.
  # Return to the principal package.
  package ModPerl::CScan;
  
  1;
  __END__
  
  =head1 NAME
  
  ModPerl::CScan - scan C language files for easily recognized constructs.
  
  =head1 SYNOPSIS
  
  =head1 DESCRIPTION
  
  See the C<C::Scan> manpage. This package is just a fork to fix certain
  things that didn't work in the original C<C::Scan>, which is not
  maintained any longer. These fixes required to make it work with the
  Apache 2 source code.
  
  =cut
  
  
  

Reply via email to