Patch: Common opcode_table parsing, take 2
Here's an updated version of my original patch, to account for recent changes in CVS. As before, this includes opcode-munging to let Parrot run on FreeBSD. - Damien diff -u --new-file -r parrot.orig/Parrot/Opcode.pm parrot/Parrot/Opcode.pm --- parrot.orig/Parrot/Opcode.pmWed Dec 31 16:00:00 1969 +++ parrot/Parrot/Opcode.pm Mon Sep 10 23:52:35 2001 @@ -0,0 +1,86 @@ +package Parrot::Opcode; + +use strict; +use Symbol; + +sub read_ops { +my $file = @_ ? shift : "opcode_table"; + +my $fh = gensym; +open $fh, $file or die "$file: $!\n"; + +my %opcode; +my $count = 1; +while (<$fh>) { + s/#.*//; + s/^\s+//; + chomp; + next unless $_; + + my($name, @params) = split /\s+/; + if (@params && $params[0] =~ /^\d+$/) { + my $count = shift @params; + die "$file, line $.: opcode $name parameters don't match count\n" + if ($count != @params); + } + + warn "$file, line $.: opcode $name redefined\n" if $opcode{$name}; + + $opcode{$name}{ARGS} = @params; + $opcode{$name}{TYPES} = \@params; + $opcode{$name}{CODE} = ($name eq "end") ? 0 : $count++; + $opcode{$name}{FUNC} = "Parrot_op_$name"; + + my $num_i = () = grep {/i/} @params; + my $num_n = () = grep {/n/} @params; + $opcode{$name}{RETURN_OFFSET} = 1 + $num_i + $num_n * 2; +} + +return %opcode; +} + +1; + + +__END__ + +=head1 NAME + +Parrot::Opcode - Read opcode definitions + +=head1 SYNOPSIS + + use Parrot::Opcode; + + %opcodes = Parrot::Opcode::read_ops(); + +=head1 DESCRIPTION + +The read_ops() function parses the Parrot opcode_table file, and +returns the contents as a hash. The hash key is the opcode name; +values are hashrefs containing the following fields: + +=over + +=item CODE + +The opcode number. + +=item ARGS + +The opcode argument count. + +=item TYPES + +The opcode argument types, as an arrayref. + +=item FUNC + +The name of the C function implementing this op. + +=back + +read_ops() takes an optional argument: the file to read the opcode table +from. + +=cut diff -u --new-file -r parrot.orig/assemble.pl parrot/assemble.pl --- parrot.orig/assemble.pl Thu Sep 13 20:45:05 2001 +++ parrot/assemble.pl Thu Sep 13 20:33:36 2001 @@ -5,6 +5,7 @@ # Brian Wheeler ([EMAIL PROTECTED]) use strict; +use Parrot::Opcode; my $opt_c; if (@ARGV and $ARGV[0] eq "-c") { @@ -25,32 +26,10 @@ foreach (keys(%real_type)) { $sizeof{$_}=length(pack($pack_type{$real_type{$_}},0)); } - -# get opcodes from guts. -open GUTS, "interp_guts.h"; -my %opcodes; -while () { -next unless /\tx\[(\d+)\] = ([a-z_]+);/; -$opcodes{$2}{CODE} = $1; -} -close GUTS; -# get opcodes and their arg lists -open OPCODES, ") { -next if /^\s*#/; -chomp; -s/^\s+//; -next unless $_; -my ($name, $args, @types) = split /\s+/, $_; -my @rtypes=@types; -@types=map { $_ = $real_type{$_}} @types; -$opcodes{$name}{ARGS} = $args; -$opcodes{$name}{TYPES} = [@types]; -$opcodes{$name}{RTYPES}=[@rtypes]; -} -close OPCODES; +# get opcodes +my %opcodes = Parrot::Opcode::read_ops(); # read source and assemble @@ -134,8 +113,8 @@ $pc+=4; foreach (0..$#args) { -my($rtype)=$opcodes{$opcode}{RTYPES}[$_]; -my($type)=$opcodes{$opcode}{TYPES}[$_]; +my($rtype)=$opcodes{$opcode}{TYPES}[$_]; +my($type)=$real_type{$opcodes{$opcode}{TYPES}[$_]}; if($rtype eq "I" || $rtype eq "N" || $rtype eq "P" || $rtype eq "S") { # its a register argument $args[$_]=~s/^[INPS](\d+)$/$1/i; diff -u --new-file -r parrot.orig/build_interp_starter.pl parrot/build_interp_starter.pl --- parrot.orig/build_interp_starter.pl Thu Sep 13 20:45:05 2001 +++ parrot/build_interp_starter.pl Thu Sep 13 20:36:14 2001 @@ -1,10 +1,9 @@ # !/usr/bin/perl -w use strict; +use Parrot::Opcode; open INTERP, "> interp_guts.h" or die "Can't open interp_guts.h, $!/$^E"; -open OPCODES, "opcode_table" or die "Can't open opcode_table, $!/$^E"; - print INTERP <) { -chomp; -s/#.*$//; -s/^\s+//; -next unless $_; -my($name) = split /\s+/; -my $num = $count; -$num = 0 if $name eq 'end'; -print INTERP "\tx[$num] = $name; \\\n"; -$count++ unless $name eq 'end'; +my %opcodes = Parrot::Opcode::read_ops(); +for my $name (sort {$opcodes{$a}{CODE} <=> $opcodes{$b}{CODE}} keys %opcodes) { +print INTERP "\tx[$opcodes{$name}{CODE}] = $opcodes{$name}{FUNC}; \\\n"; } print INTERP "} while (0);\n"; diff -u --new-file -r parrot.orig/disassemble.pl parrot/disassemble.pl --- parrot.orig/disassemble.pl Thu Sep 13 20:45:05 2001 +++ parrot/disassemble.pl Thu Sep 13 20:37:47 2001 @@ -5,6 +5,7 @@ # Turn a parrot bytecode file into text use strict; +use Parrot::Opcode; my(%opcodes, @opcodes); @@ -25,28 +26,10 @@ s => 4,
Re: Patch: Common opcode_table parsing
At 08:44 AM 9/13/2001 +0100, Simon Cozens wrote: >On Thu, Sep 13, 2001 at 12:47:06AM -0700, Damien Neil wrote: > > I *really* think we need to munge the names, though. "end" is just > > far too common a symbol for us to be able to pollute it. Let's > > learn the lesson from Perl 5: All symbols exported by the Parrot > > code need a prefix. > >Aiiee. Yes, I appreciate what you're saying, but the other lessons >from Perl 5 is that if you want to do that, you end up with either >lots of unweildy code, or a nasty macro renaming. Which is it >gonna be? We're almost never going to be calling opcode functions by name in the core, the core build doesn't care since all that stuff is autogenerated, and nobody had better be calling these functions from extensions, so... I say we munge 'em. Throw a parrot_op_ prefix on them in the preprocessing stage and be done with it. :) Dan --"it's like this"--- Dan Sugalski even samurai [EMAIL PROTECTED] have teddy bears and even teddy bears get drunk
Re: Patch: Common opcode_table parsing
On Thu, Sep 13, 2001 at 08:44:44AM +0100, Simon Cozens wrote: > Aiiee. Yes, I appreciate what you're saying, but the other lessons > from Perl 5 is that if you want to do that, you end up with either > lots of unweildy code, or a nasty macro renaming. Which is it > gonna be? I don't really like the Perl 5 approach of lots of macros. I'd rather have a short prefix attached to all symbols. par_foo() rather than foo() in all cases. For very commonly used macros (the equivalent of PUSHi()), you might relax this rule. Look at the current source: Is it really going to be any harder to always type par_string_length() rather than string_length(), or Par_Allocate_Aligned() rather than Allocate_Aligned()? The symbol names are long enough to begin with that an extra four characters isn't going to make much difference. Even a single character would do a lot to eliminate symbol clashes: Pstring_length() and PAllocate_Aligned(), for example. (Speaking of the above, someone authoritative may want to dictate whether functions are Upper_Case or lower_case.) Talking just about the opcode functions, however: Will code be calling opcode functions directly very often? Perhaps I'm wrong, but I'd think that ops being called outside the runops() loop will be rare. In fact, if ops can be embedded into a switch statement in runops() at compile time, there won't even be any assurance that there ARE any op functions to call. Unwieldy op function names shouldn't be a problem. - Damien
Re: Patch: Common opcode_table parsing
On Thu, Sep 13, 2001 at 12:47:06AM -0700, Damien Neil wrote: > I *really* think we need to munge the names, though. "end" is just > far too common a symbol for us to be able to pollute it. Let's > learn the lesson from Perl 5: All symbols exported by the Parrot > code need a prefix. Aiiee. Yes, I appreciate what you're saying, but the other lessons from Perl 5 is that if you want to do that, you end up with either lots of unweildy code, or a nasty macro renaming. Which is it gonna be? Simon
Re: Patch: Common opcode_table parsing
On Thu, Sep 13, 2001 at 08:25:46AM +0100, Simon Cozens wrote: > On Thu, Sep 13, 2001 at 12:29:18AM -0700, Damien Neil wrote: > > CVS changes over the past couple of days mean this patch will no > > longer cleanly apply. I'd be happy to update it to patch cleanly > > against the current CVS code, but I'd like to know first if the > > approach it takes is on the right track. > > I like it, if only because reduction of common code is always good, > and reduction of common code while everything's in a lot of flux > is even better. OK, I'll go through and update it again. This patch takes out the parsing of interp_guts.h, which I think is good for a variety of reasons. (Summary: it makes things simpler, and I don't think parsing it will buy us anything at this point in time.) Is this OK, or should I put it back in? > Urgh, urgh, urgh. I don't *like* the idea of munging opcode function > names, but I equally don't like coredumps. Isn't there a way of > telling the linker to use our own symbols? Actually, the problem is that the linker IS using our symbols. :> There appears to be an "end" symbol somewhere in libc that is getting munged by the Parrot symbol. I think. I didn't look deeply enough to see exactly how things were going wrong, once I traced the core to a symbol clash. I *really* think we need to munge the names, though. "end" is just far too common a symbol for us to be able to pollute it. Let's learn the lesson from Perl 5: All symbols exported by the Parrot code need a prefix. - Damien
Re: Patch: Common opcode_table parsing
On Thu, Sep 13, 2001 at 12:29:18AM -0700, Damien Neil wrote: > CVS changes over the past couple of days mean this patch will no > longer cleanly apply. I'd be happy to update it to patch cleanly > against the current CVS code, but I'd like to know first if the > approach it takes is on the right track. I like it, if only because reduction of common code is always good, and reduction of common code while everything's in a lot of flux is even better. > Whether this patch makes it in or not, it would be nice if either > the "end" opcode was renamed, or if something was added to munge > the opcode function names--the CVS parrot still cores on FreeBSD > due to a symbol conflict with libc. Urgh, urgh, urgh. I don't *like* the idea of munging opcode function names, but I equally don't like coredumps. Isn't there a way of telling the linker to use our own symbols? Simon
Re: Patch: Common opcode_table parsing
On Tue, Sep 11, 2001 at 12:06:52AM -0700, Damien Neil wrote: > The following patch moves all parsing of opcode_table into a > Parrot::Opcode module. It also removes all parsing of interp_guts.h. > This patch incorporates my earlier patches to prefix all C opcode > functions with "Perl_op_". CVS changes over the past couple of days mean this patch will no longer cleanly apply. I'd be happy to update it to patch cleanly against the current CVS code, but I'd like to know first if the approach it takes is on the right track. Whether this patch makes it in or not, it would be nice if either the "end" opcode was renamed, or if something was added to munge the opcode function names--the CVS parrot still cores on FreeBSD due to a symbol conflict with libc. - Damien
Patch: Common opcode_table parsing
The following patch moves all parsing of opcode_table into a Parrot::Opcode module. It also removes all parsing of interp_guts.h. This patch incorporates my earlier patches to prefix all C opcode functions with "Perl_op_". As best I can tell, everything works the same with the patch as it did before--the assembler and disassembler both generate identical output, and test_prog runs as well as before. (Or better on FreeBSD, where it stops core dumping. :>) - Damien diff -r --new-file -u parrot.orig/Parrot/Opcode.pm parrot/Parrot/Opcode.pm --- parrot.orig/Parrot/Opcode.pmWed Dec 31 16:00:00 1969 +++ parrot/Parrot/Opcode.pm Mon Sep 10 23:52:35 2001 @@ -0,0 +1,86 @@ +package Parrot::Opcode; + +use strict; +use Symbol; + +sub read_ops { +my $file = @_ ? shift : "opcode_table"; + +my $fh = gensym; +open $fh, $file or die "$file: $!\n"; + +my %opcode; +my $count = 1; +while (<$fh>) { + s/#.*//; + s/^\s+//; + chomp; + next unless $_; + + my($name, @params) = split /\s+/; + if (@params && $params[0] =~ /^\d+$/) { + my $count = shift @params; + die "$file, line $.: opcode $name parameters don't match count\n" + if ($count != @params); + } + + warn "$file, line $.: opcode $name redefined\n" if $opcode{$name}; + + $opcode{$name}{ARGS} = @params; + $opcode{$name}{TYPES} = \@params; + $opcode{$name}{CODE} = ($name eq "end") ? 0 : $count++; + $opcode{$name}{FUNC} = "Parrot_op_$name"; + + my $num_i = () = grep {/i/} @params; + my $num_n = () = grep {/n/} @params; + $opcode{$name}{RETURN_OFFSET} = 1 + $num_i + $num_n * 2; +} + +return %opcode; +} + +1; + + +__END__ + +=head1 NAME + +Parrot::Opcode - Read opcode definitions + +=head1 SYNOPSIS + + use Parrot::Opcode; + + %opcodes = Parrot::Opcode::read_ops(); + +=head1 DESCRIPTION + +The read_ops() function parses the Parrot opcode_table file, and +returns the contents as a hash. The hash key is the opcode name; +values are hashrefs containing the following fields: + +=over + +=item CODE + +The opcode number. + +=item ARGS + +The opcode argument count. + +=item TYPES + +The opcode argument types, as an arrayref. + +=item FUNC + +The name of the C function implementing this op. + +=back + +read_ops() takes an optional argument: the file to read the opcode table +from. + +=cut diff -r --new-file -u parrot.orig/assemble.pl parrot/assemble.pl --- parrot.orig/assemble.pl Mon Sep 10 14:26:08 2001 +++ parrot/assemble.pl Mon Sep 10 23:51:34 2001 @@ -3,6 +3,7 @@ # assemble.pl - take a parrot assembly file and spit out a bytecode file use strict; +use Parrot::Opcode; my(%opcodes, %labels); @@ -12,23 +13,7 @@ ); my $sizeof_packi = length(pack($pack_type{i},1024)); -open GUTS, "interp_guts.h"; -my $opcode; -while () { -next unless /\tx\[(\d+)\] = ([a-z_]+);/; -$opcodes{$2}{CODE} = $1; -} - -open OPCODES, ") { -next if /^\s*#/; -chomp; -s/^\s+//; -next unless $_; -my ($name, $args, @types) = split /\s+/, $_; -$opcodes{$name}{ARGS} = $args; -$opcodes{$name}{TYPES} = [@types]; -} +%opcodes = Parrot::Opcode::read_ops(); my $pc = 0; my @code; diff -r --new-file -u parrot.orig/build_interp_starter.pl parrot/build_interp_starter.pl --- parrot.orig/build_interp_starter.pl Mon Sep 10 14:26:09 2001 +++ parrot/build_interp_starter.pl Mon Sep 10 23:53:26 2001 @@ -1,10 +1,9 @@ # !/usr/bin/perl -w use strict; +use Parrot::Opcode; open INTERP, "> interp_guts.h" or die "Can't open interp_guts.h, $!/$^E"; -open OPCODES, "opcode_table" or die "Can't open opcode_table, $!/$^E"; - print INTERP <) { -chomp; -s/#.*$//; -s/^\s+//; -next unless $_; -my($name) = split /\s+/; -my $num = $count; -$num = 0 if $name eq 'end'; -print INTERP "\tx[$num] = $name; \\\n"; -$count++ unless $name eq 'end'; +my %opcodes = Parrot::Opcode::read_ops(); +for my $name (sort {$opcodes{$a}{CODE} <=> $opcodes{$b}{CODE}} keys %opcodes) { +print INTERP "\tx[$opcodes{$name}{CODE}] = $opcodes{$name}{FUNC}; \\\n"; } print INTERP "} while (0);\n"; diff -r --new-file -u parrot.orig/disassemble.pl parrot/disassemble.pl --- parrot.orig/disassemble.pl Mon Sep 10 14:45:33 2001 +++ parrot/disassemble.pl Mon Sep 10 23:57:36 2001 @@ -7,6 +7,7 @@ use strict; my(%opcodes, @opcodes); +use Parrot::Opcode; my %unpack_type; %unpack_type = (i => 'l', @@ -16,28 +17,10 @@ n => 8, ); -open GUTS, "interp_guts.h"; -my $opcode; -while () { -next unless /\tx\[(\d+)\] = ([a-z_]+);/; -$opcodes{$2}{CODE} = $1; -} - -open OPCODES, ") { -next if /^\s*#/; -s/^\s+//; -chomp; -next unless $_; -my ($name, $args, @types) = split /\s+/, $_; -next unless defined $name; -$opcodes{$name}{ARGS} = $args; -$opcodes{$name}{TYPES} = [@types]; -my $code = $opco