string.pasm patches the operators mentioned
The other file, 'parrot.pasm', is a miniature Parrot compiler, written
in Parrot.
The patches in the string.diff file are required to make this work.
It's currently -very- limited, due to some issues that I found with
macro processing and some problems in local labels that I found during
development.
Specifically, macros with labels cannot be expanded more than once
without the labels colliding.
The only sample test program it can compile is below (test.pasm):
---cut here---
# comment that will be ignored by parrot.pasm
print 9
# Maybe another comment here, these are ignored.
end
---cut here---
To prove that it is indeed compiling a test file, change '9' to
something like 732, and then:
../assemble.pl parrot.pasm>parrot.pbc
../test_prog parrot.pbc
../test_prog test.pbc
9
~/parrot/>
Now, of course, there are many limitations here.
For one, until macros are fixed (something I'm going to do tonight) I
can't have more than one macro invocation (like, say, sscanf) in a given
file. So, we can't scan for more than one instruction easily.
I also need to restructure the code to do one pass to collect the number
of operators, write that, then write the operator stream out.
--Jeff
<[EMAIL PROTECTED]>
#------------------------------------------------------------------------------
#
# read_file
#
# read_file STRING, FILE_NAME
#
read_file macro R, S, CHUNK_SIZE, TEMP_STRING
pushi
open I31, S, 0
$read_chunk: read TEMP_STRING, I31, CHUNK_SIZE
length I0, TEMP_STRING
eq I0, 0, $done
concat R, TEMP_STRING
eq I0, CHUNK_SIZE, $read_chunk
$done: close I31
popi
endm
#------------------------------------------------------------------------------
#
# sscanf
#
# sscanf STRING, INDEX, VALUE
#
sscanf macro STRING, INDEX, RETURN_VALUE
length I3,STRING
set I2,INDEX
$next_char_2: eq I2,I3,$done_2
ord I1,STRING,I2
lt I1,48,$done_2
gt I1,58,$done_2
sub I1,I1,48
mul RETURN_VALUE,RETURN_VALUE,10
add RETURN_VALUE,RETURN_VALUE,I1
inc I2
branch $next_char_2
$done_2:
endm
#------------------------------------------------------------------------------
write_magic macro FH
write FH,20010401
endm
#------------------------------------------------------------------------------
write_print_ic macro FH,IC
write FH,27
write FH,IC
endm
write_end macro FH
write FH,0
endm
#------------------------------------------------------------------------------
parse_line macro LINE, FILE, TS
eq LINE, "end", $write_end
substr TS, LINE, 0, 5
eq TS, "print", $write_print
branch $done_parsing
$write_end: write_end FILE
branch $done_parsing
$write_print:
sscanf LINE, 6, I28
# ord I28, LINE, 6
# dec I28, 48
write_print_ic FILE, I28
branch $done_parsing
$done_parsing:
endm
split_file macro R, D, TEMP_FILE
open TEMP_FILE,"test.pbc"
write_magic TEMP_FILE
write TEMP_FILE,0
write TEMP_FILE,4
write TEMP_FILE,0
#
# Unfortunate problem
#
write TEMP_FILE,12
set I31,0
length I30,R
$next_char: substr S31,R,I31,1
eq S31,"\n",$end_of_line
concat D,S31
inc I31
eq I31,I30,$end_split
branch $next_char
$end_of_line: parse_line D,TEMP_FILE,S2
set D,""
inc I31
branch $next_char
$end_split: close TEMP_FILE
endm
#------------------------------------------------------------------------------
#
# Main
#
set S0,""
read_file S0,"test.pasm",8,S31
set S1,""
split_file S0,S1,I29
end
#------------------------------------------------------------------------------
diff -ru parrot_orig/core.ops parrot/core.ops
--- parrot_orig/core.ops Tue Nov 6 11:14:25 2001
+++ parrot/core.ops Sat Nov 10 17:55:47 2001
@@ -141,6 +141,26 @@
########################################
+=item B<ord>(i,s|sc)
+
+=item B<ord>(i,s|sc,i|ic)
+
+Set $1 to the appropriate character in string $2.
+Selects character $3 if $3 is present.
+
+=cut
+
+AUTO_OP ord(i,s|sc) {
+ $1 = string_ord($2,0);
+}
+
+AUTO_OP ord(i,s|sc,i|ic) {
+ $1 = string_ord($2,$3);
+}
+
+
+########################################
+
=item B<print>(i|ic)
=item B<print>(n|nc)
@@ -196,12 +216,33 @@
STRING *s;
INTVAL len = $3;
- string_destroy($1);
+ s = $1;
+
tmp = malloc(len + 1);
- read($2, tmp, len);
- s = string_make(interpreter, tmp, len, 0, 0, 0);
- $1 = s;
- free(tmp);
+ len = read($2,tmp,len);
+ tmp[len]=0;
+ if(len==0) {
+ free(tmp); /* Clear up the potential memory leak */
+ if(s && s->bufstart != NULL) {
+ free(s->bufstart); /* Free the old allocated string. */
+ }
+ s->bufstart = NULL;
+ s->buflen = 0;
+ string_compute_strlen(s);
+ s->strlen = 0;
+ }
+ else {
+ if(s && s->bufstart != NULL) {
+ free(s->bufstart);
+ s->bufstart = tmp;
+ s->buflen = len;//strlen(tmp);
+ string_compute_strlen(s);
+ }
+ else {
+ $1 = string_make(interpreter, tmp, strlen(tmp), 0, 0, 0);
+ free(tmp);
+ }
+ }
}
diff -ru parrot_orig/include/parrot/exceptions.h parrot/include/parrot/exceptions.h
--- parrot_orig/include/parrot/exceptions.h Wed Oct 31 17:51:32 2001
+++ parrot/include/parrot/exceptions.h Fri Nov 9 20:06:16 2001
@@ -17,6 +17,7 @@
#define NO_REG_FRAMES 1
#define SUBSTR_OUT_OF_STRING 1
+#define ORD_OUT_OF_STRING 1
#define MALFORMED_UTF8 1
#define MALFORMED_UTF16 1
#define MALFORMED_UTF32 1
diff -ru parrot_orig/include/parrot/string.h parrot/include/parrot/string.h
--- parrot_orig/include/parrot/string.h Wed Oct 31 17:51:32 2001
+++ parrot/include/parrot/string.h Fri Nov 9 20:13:52 2001
@@ -45,6 +45,8 @@
/* Declarations of other functions */
INTVAL
string_length(STRING*);
+INTVAL
+string_ord(STRING* s, INTVAL index);
void
string_grow(STRING* s, INTVAL newsize);
void
diff -ru parrot_orig/string.c parrot/string.c
--- parrot_orig/string.c Wed Oct 31 17:51:31 2001
+++ parrot/string.c Sat Nov 10 18:16:27 2001
@@ -83,6 +83,33 @@
return s->strlen;
}
+/*=for api string string_ord
+ * return the length of the string
+ */
+INTVAL
+string_ord(STRING* s, INTVAL index) {
+ if(s==NULL) {
+ INTERNAL_EXCEPTION(ORD_OUT_OF_STRING,
+ "Cannot get character of empty string");
+ }
+ else {
+ int len = string_length(s);
+ if(index < 0) {
+ INTERNAL_EXCEPTION(ORD_OUT_OF_STRING,
+ "Cannot get character at negative index");
+ }
+ else if(index > (len - 1)) {
+ INTERNAL_EXCEPTION(ORD_OUT_OF_STRING,
+ "Cannot get character past end of string");
+ }
+ else {
+ char *buf = s->bufstart;
+ return buf[index];
+ }
+ }
+ return -1;
+}
+
/*=for api string string_copy
* create a copy of the argument passed in
*/
@@ -175,13 +202,19 @@
*/
STRING*
string_concat(struct Parrot_Interp *interpreter, STRING* a, STRING* b, INTVAL flags) {
- if (a->type != b->type || a->encoding != b->encoding) {
- b = string_transcode(interpreter, b, a->encoding, a->type, NULL);
+ if(a != NULL) {
+ if (a->type != b->type || a->encoding != b->encoding) {
+ b = string_transcode(interpreter, b, a->encoding, a->type, NULL);
+ }
+ string_grow(a, a->strlen + b->strlen);
+ mem_sys_memcopy((void*)((ptrcast_t)a->bufstart + a->bufused), b->bufstart,
+b->bufused);
+ a->strlen = a->strlen + b->strlen;
+ a->bufused = a->bufused + b->bufused;
+ }
+ else {
+ return string_make(interpreter,
+ b->bufstart,b->buflen,b->encoding,flags,b->type);
}
- string_grow(a, a->strlen + b->strlen);
- mem_sys_memcopy((void*)((ptrcast_t)a->bufstart + a->bufused), b->bufstart,
b->bufused);
- a->strlen = a->strlen + b->strlen;
- a->bufused = a->bufused + b->bufused;
return a;
}
diff -ru parrot_orig/t/op/string.t parrot/t/op/string.t
--- parrot_orig/t/op/string.t Tue Oct 16 14:35:04 2001
+++ parrot/t/op/string.t Sat Nov 10 17:52:02 2001
@@ -1,6 +1,6 @@
#! perl -w
-use Parrot::Test tests => 24;
+use Parrot::Test tests => 36;
output_is( <<'CODE', <<OUTPUT, "set_s_sc" );
set S4, "JAPH\n"
@@ -127,6 +127,29 @@
length 21
OUTPUT
+output_is( <<'CODE', '', "2-param concat, null onto null" );
+ concat S0,S0
+ end
+CODE
+
+output_is( <<'CODE', <<OUTPUT, '2-param concat, "foo1" onto null' );
+ concat S0,"foo1"
+ print S0
+ print "\n"
+ end
+CODE
+foo1
+OUTPUT
+
+output_is( <<'CODE', <<OUTPUT, '2-param concat, "foo2" onto null' );
+ set S1,"foo2"
+ concat S0,S1
+ print S0
+ print "\n"
+ end
+CODE
+foo2
+OUTPUT
output_is( <<'CODE', <<OUTPUT, "concat" );
set S1, "fish"
@@ -140,6 +163,7 @@
fishbone
OUTPUT
+
output_is(<<"CODE", <<'OUTPUT', "clears");
@{[ set_str_regs( sub {"BOO $_[0]\\n"} ) ]}
clears
@@ -309,7 +333,64 @@
foo
OUTPUT
+output_is(<<'CODE', 'Cannot get character of empty string','ord on an empty string -
+2-param form');
+ ord I0,S0
+ print I0
+ end
+CODE
+
+output_is(<<'CODE', 'Cannot get character of empty string','3-param ord on an empty
+string');
+ ord I0,S0,I0
+ print I0
+ end
+CODE
+
+output_is(<<'CODE', 'Cannot get character of empty string','3-param ord on an empty
+string - before start');
+ ord I0,S0,-1
+ print I0
+ end
+CODE
+
+output_is(<<'CODE', 'Cannot get character of empty string','3-param ord on an empty
+string - after end');
+ ord I0,S0,1
+ print I0
+ end
+CODE
+
+output_is(<<'CODE', 48, '2-param ord on number');
+ set S0,"0"
+ ord I0,S0
+ print I0
+ end
+CODE
+
+output_is(<<'CODE', 'Cannot get character at negative index', '3-param ord on number,
+before beginning');
+ set S0,"0"
+ ord I0,S0,-1
+ print I0
+ end
+CODE
+
+output_is(<<'CODE', 'Cannot get character past end of string', '3-param ord on
+number, after end');
+ set S0,"0"
+ ord I0,S0,1
+ print I0
+ end
+CODE
+output_is(<<'CODE', ord('a'), '2-param ord on string, index 0');
+ set S0,"absolute"
+ ord I0,S0
+ print I0
+ end
+CODE
+
+output_is(<<'CODE', ord('b'), '3-param ord on string, index 1');
+ set S0,"absolute"
+ ord I0,S0,1
+ print I0
+ end
+CODE
# Set all string registers to values given by &$_[0](reg num)
sub set_str_regs {