Chr & Ord, v0.4

James Mastros Thu, 25 Oct 2001 06:15:38 -0700

Hey all.
  This is version 0.4 of my chr and ord patch for parrot.  Included is a
patch, a test file, and an example.


I don't really see any major problems with this version, at least that
aren't implicit in the current Way Of Things with strings.  (That is, native
not being explicitly anything, and the encodings list being static.)

Chr and Ord aren't implemented for utf8 and utf16, only for native and
utf32.  I'd much appreciate it if sombody who knew what they were doing did
this.

The tests are woefuly incomplete.

The style of the example is poor.

    -=- James Mastros

Index: core.ops
===================================================================
RCS file: /home/perlcvs/parrot/core.ops,v
retrieving revision 1.18
diff -u -r1.18 core.ops
--- core.ops    2001/10/24 14:54:54     1.18
+++ core.ops    2001/10/25 13:38:35
@@ -991,6 +991,43 @@
     $1 = string_substr(interpreter, $2, $3, $4, &$1);
 }
 
+########################################
+
+=item B<ord>(i, s)
+=item B<ord>(i, sc)
+
+Set $1 to the codepoint of the first character in $2.
+
+=cut
+
+AUTO_OP ord(i, s|sc) {
+  $1 = string_ord($2);
+}
+
+########################################
+
+=item B<chr>(s, i)
+=item B<chr>(s, ic)
+
+Set $1 to a single-character string with the Unicode codepoint $2.
+
+=cut
+
+AUTO_OP chr(s, i|ic) {
+    $1 = string_chr(interpreter, $2, enc_utf32, &$1);
+}
+
+########################################
+
+=item B<chr>(s, i|ic, i|ic)
+
+Set $1 to a single-character string with the codepoint $2 in the encoding $3.
+
+=cut
+
+AUTO_OP chr(s, i|ic, i|ic) {
+    $1 = string_chr(interpreter, $2, $3, &$1);
+}
 
 =back
 
Index: string.c
===================================================================
RCS file: /home/perlcvs/parrot/string.c,v
retrieving revision 1.15
diff -u -r1.15 string.c
--- string.c    2001/10/22 23:34:47     1.15
+++ string.c    2001/10/25 13:38:35
@@ -168,6 +168,32 @@
     return (ENC_VTABLE(s1)->compare)(s1, s2);
 }
 
+/*=for api string string_ord
+ * get the codepoint of the first char of the string.
+ * (FIXME: Document in docs/strings.pod)
+ */
+INTVAL
+string_ord(STRING* s) {
+   return (ENC_VTABLE(s)->ord)(s);
+}
+
+/*=for api string string_chr
+ * Get a string with the first char having codepoint code, in the encoding 
+ * enc, and store it in d.  Also return d.
+ * Allocate memory for d if necessary.
+ */
+STRING*
+string_chr(struct Parrot_Interp *interpreter, INTVAL code, encoding_t enc, STRING** 
+d) {
+    STRING *dest;
+    if (!d || !*d) {
+        dest = string_make(interpreter, NULL, 0, enc, 0, 0);
+    }
+    else {
+        dest = *d;
+    }
+    return (ENC_VTABLE(dest)->chr)(code, dest);
+}
+
 /*
  * Local variables:
  * c-indentation-style: bsd
@@ -176,9 +202,4 @@
  * End:
  *
  * vim: expandtab shiftwidth=4:
-*/
-
-
-
-
-
+ */
Index: strnative.c
===================================================================
RCS file: /home/perlcvs/parrot/strnative.c,v
retrieving revision 1.19
diff -u -r1.19 strnative.c
--- strnative.c 2001/10/22 23:34:47     1.19
+++ strnative.c 2001/10/25 13:38:35
@@ -105,6 +105,32 @@
     return cmp;
 }
 
+/*=for api string_native string_native_ord
+   returns the value of the first byte of the string.
+ */
+INTVAL
+string_native_ord (STRING* s) {
+   return (INTVAL)*(char *)(s->bufstart);
+}
+
+/*=for api string_native string_native_chr
+   return a string whose first character is given by the INTVAL.
+*/
+STRING*
+string_native_chr (INTVAL code, STRING* dest) {
+   if (dest->encoding->which != enc_native) {
+       /* It is now, matey. */
+       dest->encoding = &(Parrot_string_vtable[enc_native]);
+   }
+
+   string_grow(dest, 1);
+   *(char *)dest->bufstart = (char)code;
+   dest->strlen = 1;
+   dest->bufused = 1;
+
+   return dest;
+}
+
 /*=for api string_native string_native_vtable
    return the vtable for the native string
 */
@@ -118,6 +144,8 @@
        string_native_chopn,
        string_native_substr,
        string_native_compare,
+        string_native_ord,
+       string_native_chr,
     };
     return sv;
 }
Index: strutf32.c
===================================================================
RCS file: /home/perlcvs/parrot/strutf32.c,v
retrieving revision 1.4
diff -u -r1.4 strutf32.c
--- strutf32.c  2001/10/22 23:34:47     1.4
+++ strutf32.c  2001/10/25 13:38:35
@@ -102,6 +102,32 @@
     return cmp;
 }
 
+/*=for api string_native string_utf32_ord
+   returns the value of the first byte of the string.
+ */
+INTVAL
+string_utf32_ord (STRING* s) {
+   return (INTVAL)*(utf32_t *)(s->bufstart);
+}
+
+/*=for api string_utf32 string_utf32_chr
+   return a string whose first character is given by the INTVAL.
+*/
+STRING*
+string_utf32_chr (INTVAL code, STRING* dest) {
+   if (dest->encoding->which != enc_utf32) {
+       /* It is now, matey. */
+       dest->encoding = &(Parrot_string_vtable[enc_utf32]);
+   }
+
+   string_grow(dest, 1);
+   *(utf32_t *)dest->bufstart = (utf32_t)code;
+   dest->strlen = 1;
+   dest->bufused = 4;
+
+   return dest;
+}
+
 /*=for api string_utf32 string_utf32_vtable
    return the vtable for the native string
 */
@@ -115,6 +141,8 @@
        string_utf32_chopn,
        string_utf32_substr,
        string_utf32_compare,
+       string_utf32_ord,
+       string_utf32_chr,
     };
     return sv;
 }
Index: include/parrot/string.h
===================================================================
RCS file: /home/perlcvs/parrot/include/parrot/string.h,v
retrieving revision 1.8
diff -u -r1.8 string.h
--- include/parrot/string.h     2001/10/22 23:34:48     1.8
+++ include/parrot/string.h     2001/10/25 13:38:35
@@ -36,6 +36,7 @@
 typedef STRING* (*substr_t)(STRING*, INTVAL, INTVAL, STRING*);
 typedef INTVAL (*iv_to_iv_t)(INTVAL);
 typedef INTVAL (*two_strings_to_iv_t)(STRING*, STRING*);
+typedef STRING* (*iv_string_to_string)(INTVAL, STRING*);
 
 struct string_vtable {
     encoding_t which;                   /* What sort of encoding is this? */
@@ -45,6 +46,8 @@
     string_iv_to_string_t chopn;        /* Remove n characters from the end of a 
string */
     substr_t substr;                    /* Substring operation */
     two_strings_to_iv_t compare;        /* Compare operation */
+    string_to_iv_t ord;                        /* Return the codepoint of the first 
+character of the string */
+    iv_string_to_string chr;           /* Return a single-char string with codepoint 
+of the argument */
 };
 
 struct parrot_string {
@@ -55,7 +58,7 @@
     INTVAL strlen;
     STRING_VTABLE* encoding;
     INTVAL type;
-    INTVAL lanugage;
+    INTVAL language;
 };
 
 
@@ -73,6 +76,10 @@
 string_substr(struct Parrot_Interp *interpreter, STRING*, INTVAL, INTVAL, STRING**);
 INTVAL
 string_compare(struct Parrot_Interp *, STRING*, STRING*);
+INTVAL
+string_ord(STRING*);
+STRING*
+string_chr(struct Parrot_Interp *, INTVAL, encoding_t, STRING**);
 
 /* Declarations of other functions */
 INTVAL

# I0 is the current character, I1 is current screen col.
set I0, 0x20 # Char 0x20 is ASCII space, avoids control codes
set I1, 0
loop: chr S0, I0
print "  "
gt I0, 99, nospace
print " "
nospace: print I0
print ": "
print S0
inc I0
add I1, I1, 8
cmod I2, I1, 80
ne I2, 0, nonewline
print "\n"
set I1, 0
nonewline: lt I0, 0x7F, loop
print "\n"
end

Chr & Ord, v0.4

Reply via email to