This patch (which is pretty big) does: * Changes the opcode_table file to provide additional information about the operands. Case shouldn't be a problem since that data never becomes a C symbol [this is pretty much as before] * Padding errors solved: assemble.pl and bytecode.c were padding the constants incorrectly. It should have been 4-(size % 4), not just (size % 4). It is now fixed in both places. * assembler has less special cases, and should be easier to hang error checking on * disassembler dumps constant table and the format is a bit prettier, including register names, etc. Test2.pbc dumps as this: # Constants: 1 entries (32 bytes) # ID Flags Encoding Type Size Data 0000: 00000000 00000000 00000000 0000000b Hello World # Code Section 00000000: set_i_ic I2, 1 0000000c: set_i_ic I1, 0 00000018: set_s_sc S1, [string 0000] 00000024: eq_i_ic I1, I2, 00000060, 00000038 00000038: length_s_i S1, I1 00000044: print_s S1 0000004c: chopn_s_ic S1, 1 00000058: branch_ic 00000024 00000060: end Let me know what you guys think! Brian [Crap, there's some wordwrapping below. Too bad you can plug emacs into evolution :) ] Index: assemble.pl =================================================================== RCS file: /home/perlcvs/parrot/assemble.pl,v retrieving revision 1.6 diff -u -r1.6 assemble.pl --- assemble.pl 2001/09/10 21:26:08 1.6 +++ assemble.pl 2001/09/11 03:14:32 @@ -9,7 +9,16 @@ my %pack_type; %pack_type = (i => 'l', n => 'd', - ); + ); + +my %real_type=('i'=>'i', + 'n'=>'n', + 'N'=>'i', + 'I'=>'i', + 'S'=>'i', + 's'=>'i', + 'D'=>'i'); + my $sizeof_packi = length(pack($pack_type{i},1024)); open GUTS, "interp_guts.h"; @@ -26,8 +35,11 @@ s/^\s+//; next unless $_; my ($name, $args, @types) = split /\s+/, $_; + my @rtypes=@types; + @types=map { $_ = $real_type{$_}} @types; $opcodes{$name}{ARGS} = $args; $opcodes{$name}{TYPES} = [@types]; + $opcodes{$name}{RTYPES}=[@rtypes]; } my $pc = 0; @@ -65,23 +77,17 @@ die "wrong arg count--got ". scalar @args. " needed " . $opcodes{$opcode}{ARGS}; } - $args[0] = fixup($args[0]) - if $opcode eq "branch_ic" and $args[0] =~ /[a-zA-Z]/; - -# if ($opcode eq "eq_i_ic" or $opcode eq "lt_i_ic") { - if ($opcode =~ /^(eq|ne|lt|le|gt|ge)_i_ic$/) { - $args[2] = fixup($args[2]) if $args[2] =~ /[a-zA-Z]/; - $args[3] = fixup($args[3]) if $args[3] =~ /[a-zA-Z]/; - } - if ($opcode eq "if_i_ic") { - $args[1] = fixup($args[1]) if $args[1] =~ /[a-zA-Z]/; - $args[2] = fixup($args[2]) if $args[2] =~ /[a-zA-Z]/; - } - print pack "l", $opcodes{$opcode}{CODE}; foreach (0..$#args) { - $args[$_] =~ s/^[INPS]?(\d+)$/$1/i; - my $type = $pack_type{$opcodes{$opcode}{TYPES}[$_]}; + my($rtype)=$opcodes{$opcode}{RTYPES}[$_]; + my($type)=$opcodes{$opcode}{TYPES}[$_]; + if($rtype eq "I" || $rtype eq "N" || $rtype eq "P" || $rtype eq "S") { + # its a register argument + $args[$_]=~s/^[INPS](\d+)$/$1/i; + } elsif($rtype eq "D") { + # a destination + $args[$_]=fixup($args[$_]); + } print pack $type, $args[$_]; } $pc += 1+@args; @@ -112,7 +118,10 @@ for (@constants) { $size += 4*$sizeof_packi; $size += length($_); - $size += length($_) % $sizeof_packi; # Padding + my($pad)=length($_) % $sizeof_packi; + if($pad) { + $size+=$sizeof_packi-$pad; + } } $size += $sizeof_packi if @constants; # That's for the number of constants @@ -127,6 +136,9 @@ print pack($pack_type{i},0) x 3; # Flags, encoding, type print pack($pack_type{i},length($_)); # Strlen followed by that many bytes. print $_; - print "\0" x (length($_) % $sizeof_packi); # Padding; + my $pad=(length($_) % $sizeof_packi); + if($pad) { + print "\0" x ($sizeof_packi-(length($_) % $sizeof_packi)); # Padding; + } } } Index: bytecode.c =================================================================== RCS file: /home/perlcvs/parrot/bytecode.c,v retrieving revision 1.4 diff -u -r1.4 bytecode.c --- bytecode.c 2001/09/10 21:47:26 1.4 +++ bytecode.c 2001/09/11 03:14:33 @@ -79,6 +79,7 @@ IV encoding = GRAB_IV(program_code); IV type = GRAB_IV(program_code); IV buflen = GRAB_IV(program_code); + int pad; len -= 4 * sizeof(IV); @@ -87,9 +88,11 @@ len -= buflen; /* Padding */ - if (buflen % sizeof(IV)) { - len -= buflen % sizeof(IV); - (char*)*program_code += buflen % sizeof(IV); + pad=buflen % sizeof(IV); + if(pad) { + pad=sizeof(IV)-pad; + len -= pad; + (char*)*program_code += pad; } num--; if (len < 0 || (len > 0 && num == 0)) { Index: disassemble.pl =================================================================== RCS file: /home/perlcvs/parrot/disassemble.pl,v retrieving revision 1.3 diff -u -r1.3 disassemble.pl --- disassemble.pl 2001/09/10 21:45:33 1.3 +++ disassemble.pl 2001/09/11 03:14:33 @@ -8,14 +8,25 @@ my(%opcodes, @opcodes); -my %unpack_type; -%unpack_type = (i => 'l', - n => 'd', - ); + +my %unpack_type = (i => 'l', + I => 'l', + n => 'd', + N => 'l', + D => 'l', + S => 'l', + s => 'l', + ); my %unpack_size = (i => 4, n => 8, + I => 4, + N => 4, + D => 4, + S => 4, + s => 4, ); + open GUTS, "interp_guts.h"; my $opcode; while (<GUTS>) { @@ -50,17 +61,54 @@ my $constants = unpack('l', <>); # Skip for now - +if($constants) { + my $count=unpack('l', <>); + print "# Constants: $count entries ($constants bytes)\n"; + print "# ID Flags Encoding Type Size Data\n"; + foreach (1..$count) { + my $flags=unpack('l',<>); + my $encoding=unpack('l',<>); + my $type=unpack('l',<>); + my $size=unpack('l',<>); + my $data=""; + while(length($data) < $size) { + $data.=<>; + } + # strip off any padding nulls + $data=substr($data,0,$size); + printf("%04x: %08x %08x %08x %08x %s\n", $_-1,$flags,$encoding,$type,$size,$data); + } +} +print "# Code Section\n"; +my $offset=0; while (<>) { my $code = unpack 'l', $_; my $args = $opcodes[$code]{ARGS}; - print $opcodes[$code]{NAME}; + my $op_offset=$offset; + print sprintf("%08x: ",$offset),$opcodes[$code]{NAME},"\t"; + my @args=(); + $offset+=4; if ($args) { foreach (1..$args) { - local $/ = \$unpack_size{$opcodes[$code]{TYPES}[$_-1]}; + my $type=$opcodes[$code]{TYPES}[$_-1]; + local $/ = \$unpack_size{$type}; + $offset+=$unpack_size{$type}; my $data = <> || die("EOF when expecting argument!\n"); - print " ", unpack $unpack_type{$opcodes[$code]{TYPES}[$_-1]}, $data;+ if($type eq "I" || $type eq "N" || $type eq "P" || $type eq "S") { + # register + push(@args,$type.unpack($unpack_type{$type},$data)); + } elsif($type eq "D") { + # destination address + +push(@args,sprintf("%08x",$op_offset+unpack($unpack_type{$type},$data)*4)); + } elsif($type eq "s") { + # string constant + push(@args,sprintf("[string %04x]",unpack($unpack_type{$type},$data))); + + } else { + # constant + push(@args,unpack $unpack_type{$type}, $data); + } } } - print "\n"; + print join(", ",@args),"\n"; } Index: opcode_table =================================================================== RCS file: /home/perlcvs/parrot/opcode_table,v retrieving revision 1.6 diff -u -r1.6 opcode_table --- opcode_table 2001/09/10 21:26:09 1.6 +++ opcode_table 2001/09/11 03:14:33 @@ -11,70 +11,80 @@ # not the type of the register or anything. So N3 is still an i, since that # 3 specifying the register should be packed as an integer. +# Revised arg types: +# i Integer constant +# I Integer register +# n Numeric constant +# N Numeric register +# s String constant? +# S String register +# D Destination + + # This must be opcode zero end 0 # Integer ops -set_i_ic 2 i i -set_i 2 i i -add_i 3 i i i -sub_i 3 i i i -mul_i 3 i i i -div_i 3 i i i -inc_i 1 i -inc_i_ic 2 i i -dec_i 1 i -dec_i_ic 2 i i +set_i_ic 2 I i +set_i 2 I I +add_i 3 I I I +sub_i 3 I I I +mul_i 3 I I I +div_i 3 I I I +inc_i 1 I +inc_i_ic 2 I i +dec_i 1 I +dec_i_ic 2 I i # NUM ops -set_n_nc 2 i n -add_n 3 i i i -sub_n 3 i i i -mul_n 3 i i i -div_n 3 i i i -inc_n 1 i -inc_n_nc 2 i n -dec_n 1 i -dec_n_nc 2 i n +set_n_nc 2 N n +add_n 3 N N N +sub_n 3 N N N +mul_n 3 N N N +div_n 3 N N N +inc_n 1 N +inc_n_nc 2 N n +dec_n 1 N +dec_n_nc 2 N n # String ops -set_s_sc 2 i i -print_s 1 i -length_s_i 2 i i -chopn_s_ic 2 i i +set_s_sc 2 S s +print_s 1 S +length_s_i 2 S I +chopn_s_ic 2 S i # Comparators -eq_i_ic 4 i i i i -eq_n_ic 4 i i i i -ne_i_ic 4 i i i i -lt_i_ic 4 i i i i -le_i_ic 4 i i i i -gt_i_ic 4 i i i i -ge_i_ic 4 i i i i +eq_i_ic 4 I I D D +eq_n_ic 4 N N D D +ne_i_ic 4 I I D D +lt_i_ic 4 I I D D +le_i_ic 4 I I D D +gt_i_ic 4 I I D D +ge_i_ic 4 I I D D # Flow control -jump_i 1 i -branch_ic 1 i -if_i_ic 3 i i i -if_n_ic 3 i i i +jump_i 1 D +branch_ic 1 D +if_i_ic 3 I D D +if_n_ic 3 N D D # Convertors -iton_n_i 2 i i -ntoi_i_n 2 i i +iton_n_i 2 N I +ntoi_i_n 2 I N # Miscellaneous and debugging ops -time_i 1 i -print_i 1 i -time_n 1 i -print_n 1 i +time_i 1 I +print_i 1 I +time_n 1 N +print_n 1 N noop 0 # Register ops Index: process_opfunc.pl =================================================================== RCS file: /home/perlcvs/parrot/process_opfunc.pl,v retrieving revision 1.3 diff -u -r1.3 process_opfunc.pl --- process_opfunc.pl 2001/09/10 21:26:09 1.3 +++ process_opfunc.pl 2001/09/11 03:14:34 @@ -40,6 +40,17 @@ $opcode{$2}{OPNUM} = $1; } + +my %psize = (i => 1, + n => 2, + I => 1, + N => 1, + D => 1, + S => 1, + s => 1, + ); + + open OPCODE, "opcode_table" or die "Can't open opcode_table, $!/$^E"; while (<OPCODE>) { s/#.*//; @@ -49,10 +60,13 @@ my ($name, $params, @params) = split /\s+/; $opcode{$name}{PARAM_COUNT} = $params; $opcode{$name}{PARAM_ARRAY} = \@params; + + my $psize=0; + foreach (@params) { + $psize+=$psize{$_}; + } - my $num_i = () = grep {/i/} @params; - my $num_n = () = grep {/n/} @params; - $opcode{$name}{RETURN_OFFSET} = 1 + $num_i + $num_n * 2; + $opcode{$name}{RETURN_OFFSET} = 1 + $psize; my $count = 1; $opcode{$name}{PARAMETER_SUB} = ["", map {if ($_ eq "n") {