Patch: Common opcode_table parsing, take 2

2001-09-13 Thread Damien Neil

Here's an updated version of my original patch, to account for recent
changes in CVS.  As before, this includes opcode-munging to let Parrot
run on FreeBSD.

  - Damien


diff -u --new-file -r parrot.orig/Parrot/Opcode.pm parrot/Parrot/Opcode.pm
--- parrot.orig/Parrot/Opcode.pmWed Dec 31 16:00:00 1969
+++ parrot/Parrot/Opcode.pm Mon Sep 10 23:52:35 2001
@@ -0,0 +1,86 @@
+package Parrot::Opcode;
+
+use strict;
+use Symbol;
+
+sub read_ops {
+my $file = @_ ? shift : "opcode_table";
+
+my $fh = gensym;
+open $fh, $file or die "$file: $!\n";
+
+my %opcode;
+my $count = 1;
+while (<$fh>) {
+   s/#.*//;
+   s/^\s+//;
+   chomp;
+   next unless $_;
+
+   my($name, @params) = split /\s+/;
+   if (@params && $params[0] =~ /^\d+$/) {
+   my $count = shift @params;
+   die "$file, line $.: opcode $name parameters don't match count\n"
+ if ($count != @params);
+   }
+
+   warn "$file, line $.: opcode $name redefined\n" if $opcode{$name};
+
+   $opcode{$name}{ARGS}  = @params;
+   $opcode{$name}{TYPES} = \@params;
+   $opcode{$name}{CODE}  = ($name eq "end") ? 0 : $count++;
+   $opcode{$name}{FUNC}  = "Parrot_op_$name";
+
+   my $num_i = () = grep {/i/} @params;
+   my $num_n = () = grep {/n/} @params;
+   $opcode{$name}{RETURN_OFFSET} = 1 + $num_i + $num_n * 2;
+}
+
+return %opcode;
+}
+
+1;
+
+
+__END__
+
+=head1 NAME
+
+Parrot::Opcode - Read opcode definitions
+
+=head1 SYNOPSIS
+
+  use Parrot::Opcode;
+
+  %opcodes = Parrot::Opcode::read_ops();
+
+=head1 DESCRIPTION
+
+The read_ops() function parses the Parrot opcode_table file, and
+returns the contents as a hash.  The hash key is the opcode name;
+values are hashrefs containing the following fields:
+
+=over
+
+=item CODE
+
+The opcode number.
+
+=item ARGS
+
+The opcode argument count.
+
+=item TYPES
+
+The opcode argument types, as an arrayref.
+
+=item FUNC
+
+The name of the C function implementing this op.
+
+=back
+
+read_ops() takes an optional argument: the file to read the opcode table
+from.
+
+=cut
diff -u --new-file -r parrot.orig/assemble.pl parrot/assemble.pl
--- parrot.orig/assemble.pl Thu Sep 13 20:45:05 2001
+++ parrot/assemble.pl  Thu Sep 13 20:33:36 2001
@@ -5,6 +5,7 @@
 # Brian Wheeler ([EMAIL PROTECTED])
 
 use strict;
+use Parrot::Opcode;
 
 my $opt_c;
 if (@ARGV and $ARGV[0] eq "-c") {
@@ -25,32 +26,10 @@
 foreach (keys(%real_type)) {
 $sizeof{$_}=length(pack($pack_type{$real_type{$_}},0));
 }
-
 
-# get opcodes from guts.
-open GUTS, "interp_guts.h";
-my %opcodes;
-while () {
-next unless /\tx\[(\d+)\] = ([a-z_]+);/;
-$opcodes{$2}{CODE} = $1;
-}
-close GUTS;
 
-# get opcodes and their arg lists
-open OPCODES, ") {
-next if /^\s*#/;
-chomp;
-s/^\s+//;
-next unless $_;
-my ($name, $args, @types) = split /\s+/, $_;
-my @rtypes=@types;
-@types=map { $_ = $real_type{$_}} @types;
-$opcodes{$name}{ARGS} = $args;
-$opcodes{$name}{TYPES} = [@types];
-$opcodes{$name}{RTYPES}=[@rtypes];
-}
-close OPCODES;
+# get opcodes
+my %opcodes = Parrot::Opcode::read_ops();
 
 
 # read source and assemble
@@ -134,8 +113,8 @@
 $pc+=4;
 
 foreach (0..$#args) {
-my($rtype)=$opcodes{$opcode}{RTYPES}[$_];
-my($type)=$opcodes{$opcode}{TYPES}[$_];
+my($rtype)=$opcodes{$opcode}{TYPES}[$_];
+my($type)=$real_type{$opcodes{$opcode}{TYPES}[$_]};
 if($rtype eq "I" || $rtype eq "N" || $rtype eq "P" || $rtype eq "S") {
 # its a register argument
 $args[$_]=~s/^[INPS](\d+)$/$1/i;
diff -u --new-file -r parrot.orig/build_interp_starter.pl 
parrot/build_interp_starter.pl
--- parrot.orig/build_interp_starter.pl Thu Sep 13 20:45:05 2001
+++ parrot/build_interp_starter.pl  Thu Sep 13 20:36:14 2001
@@ -1,10 +1,9 @@
 # !/usr/bin/perl -w
 use strict;
+use Parrot::Opcode;
 
 open INTERP, "> interp_guts.h" or die "Can't open interp_guts.h, $!/$^E";
 
-open OPCODES, "opcode_table" or die "Can't open opcode_table, $!/$^E";
-
 print INTERP <) {
-chomp;
-s/#.*$//;
-s/^\s+//;
-next unless $_;
-my($name) = split /\s+/;
-my $num = $count;
-$num = 0 if $name eq 'end';
-print INTERP "\tx[$num] = $name; \\\n";
-$count++ unless $name eq 'end';
+my %opcodes = Parrot::Opcode::read_ops();
+for my $name (sort {$opcodes{$a}{CODE} <=> $opcodes{$b}{CODE}} keys %opcodes) {
+print INTERP "\tx[$opcodes{$name}{CODE}] = $opcodes{$name}{FUNC}; \\\n";
 }
 print INTERP "} while (0);\n";
 
diff -u --new-file -r parrot.orig/disassemble.pl parrot/disassemble.pl
--- parrot.orig/disassemble.pl  Thu Sep 13 20:45:05 2001
+++ parrot/disassemble.pl   Thu Sep 13 20:37:47 2001
@@ -5,6 +5,7 @@
 # Turn a parrot bytecode file into text
 
 use strict;
+use Parrot::Opcode;
 
 my(%opcodes, @opcodes);
 
@@ -25,28 +26,10 @@
   s => 4,
   

Re: Patch: Common opcode_table parsing

2001-09-13 Thread Dan Sugalski

At 08:44 AM 9/13/2001 +0100, Simon Cozens wrote:
>On Thu, Sep 13, 2001 at 12:47:06AM -0700, Damien Neil wrote:
> > I *really* think we need to munge the names, though.  "end" is just
> > far too common a symbol for us to be able to pollute it.  Let's
> > learn the lesson from Perl 5: All symbols exported by the Parrot
> > code need a prefix.
>
>Aiiee. Yes, I appreciate what you're saying, but the other lessons
>from Perl 5 is that if you want to do that, you end up with either
>lots of unweildy code, or a nasty macro renaming. Which is it
>gonna be?

We're almost never going to be calling opcode functions by name in the 
core, the core build doesn't care since all that stuff is autogenerated, 
and nobody had better be calling these functions from extensions, so...

I say we munge 'em. Throw a parrot_op_ prefix on them in the preprocessing 
stage and be done with it. :)

Dan

--"it's like this"---
Dan Sugalski  even samurai
[EMAIL PROTECTED] have teddy bears and even
  teddy bears get drunk




Re: Patch: Common opcode_table parsing

2001-09-13 Thread Damien Neil

On Thu, Sep 13, 2001 at 08:44:44AM +0100, Simon Cozens wrote:
> Aiiee. Yes, I appreciate what you're saying, but the other lessons
> from Perl 5 is that if you want to do that, you end up with either
> lots of unweildy code, or a nasty macro renaming. Which is it
> gonna be?

I don't really like the Perl 5 approach of lots of macros.  I'd
rather have a short prefix attached to all symbols.  par_foo()
rather than foo() in all cases.  For very commonly used macros (the
equivalent of PUSHi()), you might relax this rule.

Look at the current source: Is it really going to be any harder to
always type par_string_length() rather than string_length(), or
Par_Allocate_Aligned() rather than Allocate_Aligned()?  The symbol
names are long enough to begin with that an extra four characters
isn't going to make much difference.

Even a single character would do a lot to eliminate symbol clashes:
Pstring_length() and PAllocate_Aligned(), for example.

(Speaking of the above, someone authoritative may want to dictate
whether functions are Upper_Case or lower_case.)

Talking just about the opcode functions, however: Will code be
calling opcode functions directly very often?  Perhaps I'm wrong,
but I'd think that ops being called outside the runops() loop will
be rare.  In fact, if ops can be embedded into a switch statement
in runops() at compile time, there won't even be any assurance that
there ARE any op functions to call.  Unwieldy op function names
shouldn't be a problem.

 - Damien



Re: Patch: Common opcode_table parsing

2001-09-13 Thread Simon Cozens

On Thu, Sep 13, 2001 at 12:47:06AM -0700, Damien Neil wrote:
> I *really* think we need to munge the names, though.  "end" is just
> far too common a symbol for us to be able to pollute it.  Let's
> learn the lesson from Perl 5: All symbols exported by the Parrot
> code need a prefix.

Aiiee. Yes, I appreciate what you're saying, but the other lessons
from Perl 5 is that if you want to do that, you end up with either
lots of unweildy code, or a nasty macro renaming. Which is it
gonna be?

Simon



Re: Patch: Common opcode_table parsing

2001-09-13 Thread Damien Neil

On Thu, Sep 13, 2001 at 08:25:46AM +0100, Simon Cozens wrote:
> On Thu, Sep 13, 2001 at 12:29:18AM -0700, Damien Neil wrote:
> > CVS changes over the past couple of days mean this patch will no
> > longer cleanly apply.  I'd be happy to update it to patch cleanly
> > against the current CVS code, but I'd like to know first if the
> > approach it takes is on the right track.
> 
> I like it, if only because reduction of common code is always good,
> and reduction of common code while everything's in a lot of flux
> is even better.

OK, I'll go through and update it again.  This patch takes out the
parsing of interp_guts.h, which I think is good for a variety of
reasons.  (Summary: it makes things simpler, and I don't think
parsing it will buy us anything at this point in time.)  Is this
OK, or should I put it back in?


> Urgh, urgh, urgh. I don't *like* the idea of munging opcode function
> names, but I equally don't like coredumps. Isn't there a way of 
> telling the linker to use our own symbols?

Actually, the problem is that the linker IS using our symbols. :>
There appears to be an "end" symbol somewhere in libc that is getting
munged by the Parrot symbol.  I think.  I didn't look deeply enough to
see exactly how things were going wrong, once I traced the core to a
symbol clash.

I *really* think we need to munge the names, though.  "end" is just
far too common a symbol for us to be able to pollute it.  Let's
learn the lesson from Perl 5: All symbols exported by the Parrot
code need a prefix.

  - Damien



Re: Patch: Common opcode_table parsing

2001-09-13 Thread Simon Cozens

On Thu, Sep 13, 2001 at 12:29:18AM -0700, Damien Neil wrote:
> CVS changes over the past couple of days mean this patch will no
> longer cleanly apply.  I'd be happy to update it to patch cleanly
> against the current CVS code, but I'd like to know first if the
> approach it takes is on the right track.

I like it, if only because reduction of common code is always good,
and reduction of common code while everything's in a lot of flux
is even better.

> Whether this patch makes it in or not, it would be nice if either
> the "end" opcode was renamed, or if something was added to munge
> the opcode function names--the CVS parrot still cores on FreeBSD
> due to a symbol conflict with libc.

Urgh, urgh, urgh. I don't *like* the idea of munging opcode function
names, but I equally don't like coredumps. Isn't there a way of 
telling the linker to use our own symbols?

Simon



Re: Patch: Common opcode_table parsing

2001-09-12 Thread Damien Neil

On Tue, Sep 11, 2001 at 12:06:52AM -0700, Damien Neil wrote:
> The following patch moves all parsing of opcode_table into a
> Parrot::Opcode module.  It also removes all parsing of interp_guts.h.
> This patch incorporates my earlier patches to prefix all C opcode
> functions with "Perl_op_".

CVS changes over the past couple of days mean this patch will no
longer cleanly apply.  I'd be happy to update it to patch cleanly
against the current CVS code, but I'd like to know first if the
approach it takes is on the right track.

Whether this patch makes it in or not, it would be nice if either
the "end" opcode was renamed, or if something was added to munge
the opcode function names--the CVS parrot still cores on FreeBSD
due to a symbol conflict with libc.

- Damien



Patch: Common opcode_table parsing

2001-09-10 Thread Damien Neil

The following patch moves all parsing of opcode_table into a
Parrot::Opcode module.  It also removes all parsing of interp_guts.h.
This patch incorporates my earlier patches to prefix all C opcode
functions with "Perl_op_".

As best I can tell, everything works the same with the patch as it
did before--the assembler and disassembler both generate identical
output, and test_prog runs as well as before.  (Or better on FreeBSD,
where it stops core dumping. :>)

  - Damien


diff -r --new-file -u parrot.orig/Parrot/Opcode.pm parrot/Parrot/Opcode.pm
--- parrot.orig/Parrot/Opcode.pmWed Dec 31 16:00:00 1969
+++ parrot/Parrot/Opcode.pm Mon Sep 10 23:52:35 2001
@@ -0,0 +1,86 @@
+package Parrot::Opcode;
+
+use strict;
+use Symbol;
+
+sub read_ops {
+my $file = @_ ? shift : "opcode_table";
+
+my $fh = gensym;
+open $fh, $file or die "$file: $!\n";
+
+my %opcode;
+my $count = 1;
+while (<$fh>) {
+   s/#.*//;
+   s/^\s+//;
+   chomp;
+   next unless $_;
+
+   my($name, @params) = split /\s+/;
+   if (@params && $params[0] =~ /^\d+$/) {
+   my $count = shift @params;
+   die "$file, line $.: opcode $name parameters don't match count\n"
+ if ($count != @params);
+   }
+
+   warn "$file, line $.: opcode $name redefined\n" if $opcode{$name};
+
+   $opcode{$name}{ARGS}  = @params;
+   $opcode{$name}{TYPES} = \@params;
+   $opcode{$name}{CODE}  = ($name eq "end") ? 0 : $count++;
+   $opcode{$name}{FUNC}  = "Parrot_op_$name";
+
+   my $num_i = () = grep {/i/} @params;
+   my $num_n = () = grep {/n/} @params;
+   $opcode{$name}{RETURN_OFFSET} = 1 + $num_i + $num_n * 2;
+}
+
+return %opcode;
+}
+
+1;
+
+
+__END__
+
+=head1 NAME
+
+Parrot::Opcode - Read opcode definitions
+
+=head1 SYNOPSIS
+
+  use Parrot::Opcode;
+
+  %opcodes = Parrot::Opcode::read_ops();
+
+=head1 DESCRIPTION
+
+The read_ops() function parses the Parrot opcode_table file, and
+returns the contents as a hash.  The hash key is the opcode name;
+values are hashrefs containing the following fields:
+
+=over
+
+=item CODE
+
+The opcode number.
+
+=item ARGS
+
+The opcode argument count.
+
+=item TYPES
+
+The opcode argument types, as an arrayref.
+
+=item FUNC
+
+The name of the C function implementing this op.
+
+=back
+
+read_ops() takes an optional argument: the file to read the opcode table
+from.
+
+=cut
diff -r --new-file -u parrot.orig/assemble.pl parrot/assemble.pl
--- parrot.orig/assemble.pl Mon Sep 10 14:26:08 2001
+++ parrot/assemble.pl  Mon Sep 10 23:51:34 2001
@@ -3,6 +3,7 @@
 # assemble.pl - take a parrot assembly file and spit out a bytecode file
 
 use strict;
+use Parrot::Opcode;
 
 my(%opcodes, %labels);
 
@@ -12,23 +13,7 @@
  );
 my $sizeof_packi = length(pack($pack_type{i},1024));
 
-open GUTS, "interp_guts.h";
-my $opcode;
-while () {
-next unless /\tx\[(\d+)\] = ([a-z_]+);/;
-$opcodes{$2}{CODE} = $1;
-}
-
-open OPCODES, ") {
-next if /^\s*#/;
-chomp;
-s/^\s+//;
-next unless $_;
-my ($name, $args, @types) = split /\s+/, $_;
-$opcodes{$name}{ARGS} = $args;
-$opcodes{$name}{TYPES} = [@types];
-}
+%opcodes = Parrot::Opcode::read_ops();
 
 my $pc = 0;
 my @code;
diff -r --new-file -u parrot.orig/build_interp_starter.pl 
parrot/build_interp_starter.pl
--- parrot.orig/build_interp_starter.pl Mon Sep 10 14:26:09 2001
+++ parrot/build_interp_starter.pl  Mon Sep 10 23:53:26 2001
@@ -1,10 +1,9 @@
 # !/usr/bin/perl -w
 use strict;
+use Parrot::Opcode;
 
 open INTERP, "> interp_guts.h" or die "Can't open interp_guts.h, $!/$^E";
 
-open OPCODES, "opcode_table" or die "Can't open opcode_table, $!/$^E";
-
 print INTERP <) {
-chomp;
-s/#.*$//;
-s/^\s+//;
-next unless $_;
-my($name) = split /\s+/;
-my $num = $count;
-$num = 0 if $name eq 'end';
-print INTERP "\tx[$num] = $name; \\\n";
-$count++ unless $name eq 'end';
+my %opcodes = Parrot::Opcode::read_ops();
+for my $name (sort {$opcodes{$a}{CODE} <=> $opcodes{$b}{CODE}} keys %opcodes) {
+print INTERP "\tx[$opcodes{$name}{CODE}] = $opcodes{$name}{FUNC}; \\\n";
 }
 print INTERP "} while (0);\n";
diff -r --new-file -u parrot.orig/disassemble.pl parrot/disassemble.pl
--- parrot.orig/disassemble.pl  Mon Sep 10 14:45:33 2001
+++ parrot/disassemble.pl   Mon Sep 10 23:57:36 2001
@@ -7,6 +7,7 @@
 use strict;
 
 my(%opcodes, @opcodes);
+use Parrot::Opcode;
 
 my %unpack_type;
 %unpack_type = (i => 'l',
@@ -16,28 +17,10 @@
   n => 8,
   );
 
-open GUTS, "interp_guts.h";
-my $opcode;
-while () {
-next unless /\tx\[(\d+)\] = ([a-z_]+);/;
-$opcodes{$2}{CODE} = $1;
-}
-
-open OPCODES, ") {
-next if /^\s*#/;
-s/^\s+//;
-chomp;
-next unless $_;
-my ($name, $args, @types) = split /\s+/, $_;
-next unless defined $name;
-$opcodes{$name}{ARGS} = $args;
-$opcodes{$name}{TYPES} = [@types];
-my $code = $opco