All --
While I was working on understanding jump_i, I was wishing I could
see a trace of where the PC was going and what it was executing.
(BTW, the jump.pasm test works now that I hard-coded the offsets
as *word* counts not *byte* counts -- D'Oh!)
I was also concerned that even though I was probably jumping out of
the bytecode block I wasn't getting an error message from the
interpreter.
The patch I'm attaching here does the following things:
* Passes around the bytecode block size to all the functions that
work with it. As we "strip off" the leading stuff to get to the
actual code to execute, the size is also decremented.
* Generates BUILD_NAME_TABLE and BUILD_ARG_TABLE macros via
build_interp_starter.pl. These are used later when tracing ops.
* Additional bounds checks in the runops loop. I know we don't
like to add code to the inner loop, but allowing bogus or evil
bytecode to jump to arbitrary locations outside the bytecode
block is most likely a Bad Thing (TM).
* If the TRACE_OPS symbol is defined, then additional code will
be enabled which prints out the PC, OP and args for each operation
the interpreter executes.
NOTES:
* For testing, I hard-coded #define TRACE_OPS. I'm not comfortable
enough with the config stuff to muck with it. I'd like to be
able to enable this stuff with a config option (presuming Dan
et al. like the idea of me committing this modulo some
refinements). Can someone send me a patch with the right
incantations so I can try it out?
* The bounds checking actually doesn't go quite far enough. Here
are two other things that need consideration:
* Suppose we jump to the last word in the bytecode, and there
is an op there that expects arguments... BOOM
* Suppose the bytecode contains opcodes for out-of-range ops...
BOOM.
* Now, we could consider verifying the bytecode after reading
it and before interpreting it to see that these things don't
occur. BUT, I don't think there is anything to stop someone
from producing a perfectly fine bytecode stream that has
data embedded in the middle or at the end that is never
intended to be executed (unless we make a rule that says
Thou Shalt Not Do That).
I'd like to commit this if I can get the Config support worked out.
I think it would be a big help in debugging new ops, especially as
we start working on subroutines and such.
Any thoughts?
Regards,
-- Gregor
_____________________________________________________________________
/ perl -e 'srand(-2091643526); print chr rand 90 for (0..4)' \
Gregor N. Purdy [EMAIL PROTECTED]
Focus Research, Inc. http://www.focusresearch.com/
8080 Beckett Center Drive #203 513-860-3570 vox
West Chester, OH 45069 513-860-3579 fax
\_____________________________________________________________________/
? op.diff
? trace_ops.diff
? t/inc.pasm
? t/jumpsub.pasm
? t/substr.pasm
Index: build_interp_starter.pl
===================================================================
RCS file: /home/perlcvs/parrot/build_interp_starter.pl,v
retrieving revision 1.6
diff -u -r1.6 build_interp_starter.pl
--- build_interp_starter.pl 2001/09/15 16:05:40 1.6
+++ build_interp_starter.pl 2001/09/17 14:17:48
@@ -26,7 +26,39 @@
}
print INTERP "} while (0);\n";
+
+#
+# BUILD_NAME_TABLE macro:
+#
+
+print INTERP <<CONST;
+#define BUILD_NAME_TABLE(x) do { \\
+CONST
+
+for my $name (sort {$opcodes{$a}{CODE} <=> $opcodes{$b}{CODE}} keys %opcodes) {
+ print INTERP "\tx[$opcodes{$name}{CODE}] = \"$name\"; \\\n";
+}
+print INTERP "} while (0);\n";
+
+
+#
+# BUILD_ARG_TABLE macro:
+#
+
+print INTERP <<CONST;
+#define BUILD_ARG_TABLE(x) do { \\
+CONST
+
+for my $name (sort {$opcodes{$a}{CODE} <=> $opcodes{$b}{CODE}} keys %opcodes) {
+ print INTERP "\tx[$opcodes{$name}{CODE}] = $opcodes{$name}{ARGS}; \\\n";
+}
+print INTERP "} while (0);\n";
+
+
+#
# Spit out the DO_OP function
+#
+
print INTERP <<EOI;
#define DO_OP(w,x,y,z) do { \\
Index: bytecode.c
===================================================================
RCS file: /home/perlcvs/parrot/bytecode.c,v
retrieving revision 1.9
diff -u -r1.9 bytecode.c
--- bytecode.c 2001/09/16 01:45:50 1.9
+++ bytecode.c 2001/09/17 14:17:49
@@ -49,7 +49,8 @@
*/
static int
-check_magic(void** program_code) {
+check_magic(void** program_code, long* program_size) {
+ program_size -= sizeof(IV);
return (GRAB_IV(program_code) == PARROT_MAGIC);
}
@@ -69,12 +70,14 @@
*/
static void
-read_constants_table(void** program_code)
+read_constants_table(void** program_code, long* program_size)
{
IV len = GRAB_IV(program_code);
IV num;
IV i = 0;
+ *program_size -= sizeof(IV);
+
Parrot_num_string_constants = len;
if (len == 0) {
return;
@@ -82,6 +85,7 @@
num = GRAB_IV(program_code);
len -= sizeof(IV);
+ *program_size -= sizeof(IV);
Parrot_string_constants = mem_allocate_aligned(num * sizeof(STRING*));
@@ -93,10 +97,12 @@
int pad;
len -= 4 * sizeof(IV);
+ *program_size -= 4 * sizeof(IV);
Parrot_string_constants[i++] = string_make(*program_code /* ouch */, buflen,
encoding, flags, type);
(char*)*program_code += buflen;
len -= buflen;
+ *program_size -= buflen;
/* Padding */
pad=buflen % sizeof(IV);
@@ -104,6 +110,7 @@
pad=sizeof(IV)-pad;
len -= pad;
(char*)*program_code += pad;
+ *program_size -= pad;
}
num--;
if (len < 0 || (len > 0 && num == 0)) {
@@ -129,11 +136,13 @@
*/
static void
-read_fixup_table(void** program_code)
+read_fixup_table(void** program_code, long* program_size)
{
IV len = GRAB_IV(program_code);
+ *program_size -= sizeof(IV);
/* For now, just skip over it */
((IV*)*program_code) += len;
+ *program_size -= len;
}
/*
@@ -152,15 +161,15 @@
*/
void *
-init_bytecode(void* program_code)
+init_bytecode(void* program_code, long* program_size)
{
- if (!check_magic(&program_code)) {
+ if (!check_magic(&program_code, program_size)) {
printf("This isn't Parrot bytecode!\n");
exit(1);
}
- read_fixup_table(&program_code);
- read_constants_table(&program_code);
+ read_fixup_table(&program_code, program_size);
+ read_constants_table(&program_code, program_size);
return program_code;
}
Index: bytecode.h
===================================================================
RCS file: /home/perlcvs/parrot/bytecode.h,v
retrieving revision 1.4
diff -u -r1.4 bytecode.h
--- bytecode.h 2001/09/16 01:45:50 1.4
+++ bytecode.h 2001/09/17 14:17:49
@@ -14,7 +14,7 @@
#define PARROT_BYTECODE_H_GUARD
void*
-init_bytecode(void* program_code);
+init_bytecode(void* program_code, long* program_size);
IV Parrot_num_string_constants;
VAR_SCOPE STRING** Parrot_string_constants;
Index: interpreter.c
===================================================================
RCS file: /home/perlcvs/parrot/interpreter.c,v
retrieving revision 1.12
diff -u -r1.12 interpreter.c
--- interpreter.c 2001/09/17 12:56:55 1.12
+++ interpreter.c 2001/09/17 14:17:49
@@ -13,18 +13,28 @@
#include "parrot/parrot.h"
#include "parrot/interp_guts.h"
+#ifdef TRACE_OPS
+char *op_names[2048];
+int op_args[2048];
+#endif /* TRACE_OPS */
+
/*=for api interpreter runops
* run parrot operations until the program is complete
*/
void
-runops (struct Parrot_Interp *interpreter, IV *code) {
+runops (struct Parrot_Interp *interpreter, IV *code, IV code_size) {
/* Move these out of the inner loop. No need to redeclare 'em each
time through */
IV *(*func)();
void **temp;
+ IV *code_start;
+
+#ifdef TRACE_OPS
+ int i;
+#endif /* TRACE_OPS */
if (Parrot_num_string_constants == 0) {
- printf("Warning: Bytecode does not include opcode table fingerprint!\n");
+ fprintf(stderr, "Warning: Bytecode does not include opcode table
+fingerprint!\n");
} else {
const char * fp_data;
IV fp_len;
@@ -33,15 +43,43 @@
fp_len = Parrot_string_constants[0]->buflen;
if (strncmp(OPCODE_FINGERPRINT, fp_data, fp_len)) {
- printf("Error: Opcode table fingerprint in bytecode does not match
interpreter!\n");
- printf(" Bytecode: %*s\n", -fp_len, fp_data);
- printf(" Interpreter: %s\n", OPCODE_FINGERPRINT);
+ fprintf(stderr, "Error: Opcode table fingerprint in bytecode does not
+match interpreter!\n");
+ fprintf(stderr, " Bytecode: %*s\n", -fp_len, fp_data);
+ fprintf(stderr, " Interpreter: %s\n", OPCODE_FINGERPRINT);
exit(1);
}
}
+
+ code_start = code;
- while (*code) {
+#ifdef TRACE_OPS
+ fprintf(stderr, "PC=%ld; OP=%ld (%s)\n", code - code_start, *code,
+op_names[*code]);
+#endif /* TRACE_OPS */
+
+ while (code >= code_start && code < (code_start + code_size) && *code) {
DO_OP(code, temp, func, interpreter);
+
+#ifdef TRACE_OPS
+ if (code >= code_start && code < (code_start + code_size)) {
+ fprintf(stderr, "PC=%ld; OP=%ld (%s)", code - code_start, *code,
+op_names[*code]);
+ if (op_args[*code]) {
+ fprintf(stderr, "; ARGS=(");
+ for(i = 0; i < op_args[*code]; i++) {
+ if (i) { fprintf(stderr, ", "); }
+ fprintf(stderr, "%ld", *(code + i + 1));
+ }
+ fprintf(stderr, ")");
+ }
+ fprintf(stderr, "\n");
+ } else {
+ fprintf(stderr, "PC=%ld; OP=<err>\n", code - code_start);
+ }
+#endif /* TRACE_OPS */
+ }
+
+ if (code < code_start || code >= (code_start + code_size)) {
+ fprintf(stderr, "Error: Control left bounds of byte-code block (now at
+location %d)!\n", code - code_start);
+ exit(1);
}
}
@@ -111,6 +149,11 @@
BUILD_TABLE(foo);
interpreter->opcode_funcs = (void*)foo;
+
+#ifdef TRACE_OPS
+ BUILD_NAME_TABLE(op_names);
+ BUILD_ARG_TABLE(op_args);
+#endif /* TRACE_OPS */
}
/* In case the I/O system needs something */
Index: interpreter.h
===================================================================
RCS file: /home/perlcvs/parrot/interpreter.h,v
retrieving revision 1.6
diff -u -r1.6 interpreter.h
--- interpreter.h 2001/09/16 01:45:51 1.6
+++ interpreter.h 2001/09/17 14:17:49
@@ -39,7 +39,7 @@
make_interpreter();
void
-runops(struct Parrot_Interp *, IV *);
+runops(struct Parrot_Interp *, IV *, IV);
#endif
Index: test_main.c
===================================================================
RCS file: /home/perlcvs/parrot/test_main.c,v
retrieving revision 1.8
diff -u -r1.8 test_main.c
--- test_main.c 2001/09/16 22:05:21 1.8
+++ test_main.c 2001/09/17 14:17:49
@@ -34,7 +34,7 @@
/* If we got only the program name, run the test program */
if (argc == 1) {
- runops(interpreter, opcodes);
+ runops(interpreter, opcodes, sizeof(opcodes));
}
else if (argc == 2 && !strcmp(argv[1], "-s")) { /* String tests */
STRING *s = string_make("foo", 3, enc_native, 0, 0);
@@ -60,8 +60,10 @@
/* Otherwise load in the program they gave and try that */
else {
void *program_code;
+ long program_size;
struct stat file_stat;
int fd;
+
if (stat(argv[1], &file_stat)) {
printf("can't stat %s, code %i\n", argv[1], errno);
return 1;
@@ -72,20 +74,23 @@
return 1;
}
+ program_size = file_stat.st_size;
+
#ifndef HAS_HEADER_SYSMMAN
- program_code = mem_sys_allocate(file_stat.st_size);
- _read(fd, program_code, file_stat.st_size);
+ program_code = mem_sys_allocate(program_size);
+ _read(fd, program_code, program_size);
#else
- program_code = mmap(0, file_stat.st_size, PROT_READ, MAP_SHARED, fd,
0);
+ program_code = mmap(0, program_size, PROT_READ, MAP_SHARED, fd, 0);
#endif
+
if (!program_code) {
printf("Can't mmap, code %i\n", errno);
return 1;
}
- program_code = init_bytecode(program_code);
+ program_code = init_bytecode(program_code, &program_size);
- runops(interpreter, program_code);
+ runops(interpreter, program_code, program_size);
}
return 0;