I needed to create a Match PMC object for holding the match groups
(parenthesized expressions and capturing rules) from a regex match.
Unfortunately, it works by using another new PMC type, the MatchRange
PMC, to signal that an element of its hashtable should be interpreted
specially (as a substring of the input string). One PMC knowing about
another currently means they need to be static PMCs, not dynamic.
(AFAIK) So this is the patch of what I am currently using. I cannot
guarantee it will actually be useful for any other regex implementors,
so I feel uncomfortable committing it myself. (OTOH, if someone needs
something different, they can just add it as a different name.) The
point is, this is something I need for my stuff and the future of
languages/regex is with some version of it, so I can't commit those
changes without this. Although I fully expect the Match PMC will need
to be substantially beefed up to become a full grammar object (or
something...), this is base functionality that it needs to start with.

With these two PMCs, I can construct a match object containing the
hypotheticals $1, $2, etc., as well as a full parse tree comprised of
nested match objects. This does *not* handle saving and restoring
previous hypothetical values, as is needed in the case of

 (a)+b

In my compiler, that is handled by the compiled engine code.
Index: classes/match.pmc
===================================================================
RCS file: classes/match.pmc
diff -N classes/match.pmc
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ classes/match.pmc   17 Aug 2004 17:02:01 -0000
@@ -0,0 +1,205 @@
+/*
+Copyright: 2004 The Perl Foundation.  All Rights Reserved.
+$Id$
+
+=head1 NAME
+
+classes/match.pmc - Match object for rules
+
+=head1 DESCRIPTION
+
+This is a match object for holding hypothetical variables, the input string,
+etc.
+
+For now, it is really just proof-of-concept code, and I fully expect
+anyone who reads this to hurl. Violently.
+
+=head2 Functions
+
+=over 4
+
+=cut
+
+*/
+
+#include <assert.h>
+#include "parrot/parrot.h"
+
+STRING * hash_get_idx(Interp *interpreter, Hash *hash, PMC *key);
+
+static STRING* make_hash_key(Interp* interpreter, PMC * key)
+{
+    if (key == NULL) {
+        internal_exception(OUT_OF_BOUNDS,
+        "Cannot use NULL key for Match!\n");
+        return NULL;
+    }
+    return key_string(interpreter, key);
+}
+
+static STRING* match_range(Interp* interp, PMC* self, PMC* range)
+{
+    STRING* input_key = const_string(interp, "!INPUT");
+    Hash* hash = (Hash*) PMC_struct_val(self);
+    HashBucket *b;
+    STRING* input;
+    int start, end;
+
+    b = hash_get_bucket(interp, hash, input_key);
+    if (!b) {
+        internal_exception(1, "Match: input string not set");
+        return NULL;
+    }
+
+    input = VTABLE_get_string(interp, (PMC*) b->value);
+    /* These could both be converted to grab UVal_int directly, but
+     * I'll leave it like this for now because it'll test the vtable
+     * members. */
+    start = VTABLE_get_integer_keyed_int(interp, range, 0);
+    end = VTABLE_get_integer_keyed_int(interp, range, 1);
+
+    if (start == -2 || end == -2 || end < start - 1)
+        return NULL;
+    else
+        return string_substr(interp, input, start, end - start + 1, NULL, 0);
+}
+
+static STRING* fetch_string(Interp* interp, PMC* matchobj, PMC* val)
+{
+    if (val->vtable->base_type == enum_class_MatchRange) {
+        return match_range(interp, matchobj, val);
+    } else {
+        return VTABLE_get_string(interp, val);
+    }
+}
+
+static INTVAL fetch_integer(Interp* interp, PMC* matchobj, PMC* val)
+{
+    if (val->vtable->base_type == enum_class_MatchRange) {
+        STRING* valstr = match_range(interp, matchobj, val);
+        return string_to_int(interp, valstr);
+    } else {
+        return VTABLE_get_integer(interp, val);
+    }
+}
+
+pmclass Match extends PerlHash {
+
+/*
+
+=item C<STRING *get_string_keyed_str(STRING *key)>
+
+=cut
+
+*/
+
+    STRING* get_string_keyed_str (STRING* key) {
+        PMC* value;
+        Hash* hash = (Hash*) PMC_struct_val(SELF);
+        HashBucket *b = hash_get_bucket(INTERP, hash, key);
+        if (b == NULL) {
+            /* XXX Warning: use of uninitialized value */
+            /* return VTABLE_get_string(INTERP, undef); */
+            return NULL;
+        }
+        return fetch_string(INTERP, SELF, (PMC*) b->value);
+    }
+
+/*
+
+=item C<STRING *get_string_keyed(PMC *key)>
+
+Returns the string value for the element at C<*key>.
+
+=cut
+
+*/
+
+    STRING* get_string_keyed (PMC* key) {
+        PMC* valpmc;
+        STRING* keystr;
+        HashBucket *b;
+        Hash *hash = PMC_struct_val(SELF);
+        PMC* nextkey;
+
+        switch (PObj_get_FLAGS(key) & KEY_type_FLAGS) {
+            case KEY_integer_FLAG:
+                /* called from iterator with an integer idx in key */
+                /* BUG! This will iterate through the input string as
+                 * well as all of the real values. */
+                if (hash->key_type == Hash_key_type_int) {
+                    INTVAL i = (INTVAL)hash_get_idx(INTERP, hash, key);
+                    return string_from_int(interpreter, i);
+                }
+                return hash_get_idx(INTERP, hash, key);
+            default:
+                keystr = make_hash_key(INTERP, key);
+        }
+        b = hash_get_bucket(INTERP, hash, keystr);
+        if (b == NULL) {
+            return SELF.get_string_keyed_str(keystr);
+        }
+        nextkey = key_next(INTERP, key);
+        valpmc = b->value;
+        if (!nextkey)
+            return fetch_string(INTERP, SELF, valpmc);
+        return VTABLE_get_string_keyed(INTERP, valpmc, nextkey);
+    }
+
+    INTVAL get_integer_keyed (PMC* key) {
+        PMC* valpmc;
+        STRING* keystr;
+        Hash *hash = PMC_struct_val(SELF);
+        HashBucket *b;
+        PMC* nextkey;
+
+        switch (PObj_get_FLAGS(key) & KEY_type_FLAGS) {
+            case KEY_integer_FLAG:
+                /* called from iterator with an integer idx in key
+                 * check if we really have Hash_key_type_int
+                 */
+                if (hash->key_type == Hash_key_type_int) {
+                    return (INTVAL)hash_get_idx(INTERP, hash, key);
+                }
+                else {
+                    STRING *s = hash_get_idx(INTERP, hash, key);
+                    return string_to_int(interpreter, s);
+                }
+            default:
+                keystr = make_hash_key(INTERP, key);
+        }
+        b = hash_get_bucket(INTERP, hash, keystr);
+        if (b == NULL) {
+            /* XXX Warning: use of uninitialized value */
+            return 0;
+        }
+        nextkey = key_next(INTERP, key);
+        valpmc = b->value;
+        if (!nextkey)
+            return fetch_integer(INTERP, SELF, valpmc);
+        return VTABLE_get_integer_keyed(INTERP, valpmc, nextkey);
+    }
+
+}
+
+/*
+
+=back
+
+=head1 SEE ALSO
+
+F<docs/pdds/pdd08_keys.pod>.
+
+=cut
+
+*/
+
+/*
+ * Local variables:
+ * c-indentation-style: bsd
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ *
+ * vim: expandtab shiftwidth=4:
+*/
Index: classes/matchrange.pmc
===================================================================
RCS file: classes/matchrange.pmc
diff -N classes/matchrange.pmc
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ classes/matchrange.pmc      17 Aug 2004 17:02:01 -0000
@@ -0,0 +1,422 @@
+/*
+Copyright: 2004 The Perl Foundation.  All Rights Reserved.
+$Id: matchrange.pmc,v 1.8 2004/07/29 09:12:38 leo Exp $
+
+=head1 NAME
+
+classes/matchrange.pmc - MatchRange Numbers PMC Class
+
+=head1 DESCRIPTION
+
+C<MatchRange> provides a representation of regular expression matches,
+by describing the starting and ending offsets within the input string.
+This PMC only provides the start and end values; the Match PMC is
+responsible for interpreting these values appropriately (namely, as a
+substring within the input string.)
+
+This really probably shouldn't be a top-level class; it probably ought
+to inherit from "IntPair" or something like that. (But it can't just
+*be* "IntPair", because the Match PMC uses the type to figure out
+whether it has a regular variable or something that it needs to
+interpret as a string given its value and the input string.)
+
+=head2 Functions
+
+=over 4
+
+=cut
+
+*/
+
+#include "parrot/parrot.h"
+
+#define RANGE_START(pmc) UVal_int((pmc)->obj.u)
+#define RANGE_END(pmc) UVal_int2((pmc)->obj.u)
+
+/*
+
+=item C<static INTVAL*
+matchrange_locate_keyed_int(Parrot_Interp interp, PMC* self, STRING *key)>
+
+Interpret the string C<key>; valid keys are C<start> and C<end>,
+representing the offsets of the first and last characters of the
+matching range.
+
+=cut
+
+*/
+
+static INTVAL*
+matchrange_locate_keyed_int(Parrot_Interp interp, PMC* self, STRING *key)
+{
+    STRING *start = string_from_cstring(interp, "start", 5);
+    STRING *end;
+
+    if(0 == string_equal(interp, key, start))
+        return &RANGE_START(self);
+    end = string_from_cstring(interp, "end", 3);
+    if(0 == string_equal(interp, key, end))
+        return &RANGE_END(self);
+    internal_exception(KEY_NOT_FOUND,
+        "MatchRange: key is neither 'start' nor 'end'\n");
+    return NULL;
+}
+
+pmclass MatchRange {
+
+/*
+
+=back
+
+=head2 Methods
+
+=over 4
+
+=item C<void init()>
+
+Initializes the matchrange with the [-2,-2].
+
+=item C<void init_pmc (PMC* initializer)>
+
+Initializes the matchrange number with the specified values.
+(not implemented)
+
+=item C<void destroy ()>
+
+Cleans up.
+
+=item C<PMC* clone ()>
+
+Creates an identical copy of the matchrange number.
+
+=cut
+
+*/
+
+    void init () {
+        PObj_active_destroy_SET(SELF);
+        RANGE_START(SELF) = RANGE_END(SELF) = -2;
+    }
+
+    void init_pmc (PMC* initializer) {
+        /* XXX not implemented */
+        DYNSELF.init();
+    }
+
+    void destroy () {
+    }
+
+    void morph (INTVAL type) {
+        if (SELF->vtable->base_type == type)
+            return;
+        SUPER(type);
+    }
+
+    PMC* clone () {
+        PMC* dest = pmc_new_noinit(INTERP, SELF->vtable->base_type);
+        PObj_active_destroy_SET(dest);
+        RANGE_START(dest) = RANGE_START(SELF);
+        RANGE_END(dest) = RANGE_END(SELF);
+        return dest;
+    }
+
+/*
+
+=item C<INTVAL get_bool ()>
+
+Returns true if the match range is defined.
+
+=cut
+
+*/
+
+    INTVAL get_bool () {
+        return (INTVAL)(RANGE_START(SELF) != -2 && RANGE_END(SELF) != -2);
+    }
+
+/*
+
+=item C<INTVAL get_integer_keyed (PMC* key)>
+
+=item C<INTVAL get_integer_keyed_str (STRING* key)>
+
+=item C<FLOATVAL get_number_keyed (PMC* key)>
+
+=item C<FLOATVAL get_number_keyed_str (STRING* key)>
+
+=item C<PMC* get_pmc_keyed (PMC* key)>
+
+=item C<PMC* get_pmc_keyed_str (STRING* key)>
+
+Returns the requested number (real part for C<real> and imaginary for C<imag>).
+
+=cut
+
+*/
+
+    INTVAL get_integer_keyed (PMC* key) {
+        if (key_type(INTERP, key) == KEY_integer_FLAG) {
+            return SELF.get_integer_keyed_int(PMC_int_val(key));
+        } else {
+            STRING* s = VTABLE_get_string(INTERP, key);
+            return DYNSELF.get_integer_keyed_str(s);
+        }
+    }
+
+    INTVAL get_integer_keyed_str (STRING* key) {
+        return (INTVAL)(DYNSELF.get_number_keyed_str(key));
+    }
+
+    FLOATVAL get_number_keyed (PMC* key) {
+        STRING* s = VTABLE_get_string(INTERP, key);
+        return DYNSELF.get_number_keyed_str(s);
+    }
+
+    FLOATVAL get_number_keyed_str (STRING* key) {
+        INTVAL *num = matchrange_locate_keyed_int(INTERP, SELF, key);
+        if(num)
+            return *num;
+        return 0.0;
+    }
+
+    PMC* get_pmc_keyed (PMC* key) {
+        STRING* s = VTABLE_get_string(INTERP, key);
+        return DYNSELF.get_pmc_keyed_str(s);
+    }
+
+    PMC* get_pmc_keyed_str (STRING* key) {
+        PMC *ret;
+        FLOATVAL val;
+
+        ret = pmc_new(INTERP, enum_class_Float);
+        val = DYNSELF.get_number_keyed_str(key);
+        VTABLE_set_number_native(INTERP, ret, val);
+        return ret;
+    }
+
+/*
+
+=item C<INTVAL get_integer_keyed_int(INTVAL key)>
+
+Quick hack to emulate get_start() and get_end():
+
+  key = 0 ... get start offset
+  key = 1 ... get end offset
+
+=item C<void set_integer_keyed_int(INTVAL key, INTVAL v)>
+
+Set start or end depending on key
+
+*/
+
+    INTVAL get_integer_keyed_int(INTVAL key) {
+        switch (key) {
+            case 0:
+                return RANGE_START(SELF);
+            case 1:
+                return RANGE_END(SELF);
+            default:
+                internal_exception(1, "MatchRange: key must be 0 or 1");
+        }
+        return -2;
+    }
+
+    void set_integer_keyed_int(INTVAL key, INTVAL v) {
+        switch (key) {
+            case 0:
+                RANGE_START(SELF) = v;
+                break;
+            case 1:
+                RANGE_END(SELF) = v;
+                break;
+            default:
+                internal_exception(1, "MatchRange: key must be 0 or 1");
+        }
+    }
+/*
+
+=item C<void set_pmc (PMC* value)>
+
+if C<value> is a MatchRange PMC then the matchrange number is set to
+its value; otherwise throw an exception.
+
+=cut
+
+*/
+
+    void set_pmc (PMC* value) {
+        if(value->vtable->base_type == enum_class_MatchRange) {
+            RANGE_START(SELF) = RANGE_START(value);
+            RANGE_END(SELF) = RANGE_END(value);
+        }
+        else {
+            internal_exception(1, "MatchRange: cannot set from pmc");
+        }
+    }
+
+/*
+
+=item C<void set_integer_keyed (PMC* key, INTVAL value)>
+
+=item C<void set_integer_keyed_str (STRING* key, INTVAL value)>
+
+=item C<void set_number_keyed (PMC* key, FLOATVAL value)>
+
+=item C<void set_number_keyed_str (STRING* key, FLOATVAL value)>
+
+=item C<void set_number_keyed_int (INTVAL key, FLOATVAL value)>
+
+=item C<void set_pmc_keyed (PMC* key, PMC* value)>
+
+=item C<void set_pmc_keyed_str (STRING* key, PMC* value)>
+
+Sets the requested number (start offset for C<start> and end offset for
+C<end>) to C<value>. For the keyed_int variants, 0 means RANGE_START,
+1 means RANGE_END.
+
+=cut
+
+*/
+
+    void set_integer_keyed (PMC* key, INTVAL value) {
+        if (key_type(INTERP, key) == KEY_integer_FLAG) {
+            return SELF.set_integer_keyed_int(PMC_int_val(key), value);
+        } else {
+            STRING* s = VTABLE_get_string(INTERP, key);
+            DYNSELF.set_integer_keyed_str(s, value);
+        }
+    }
+
+    void set_integer_keyed_str (STRING* key, INTVAL value) {
+        INTVAL *num = matchrange_locate_keyed_int(INTERP, SELF, key);
+        if(num)
+            *num = value;
+    }
+
+    void set_number_keyed (PMC* key, FLOATVAL value) {
+        DYNSELF.set_integer_keyed(key, (INTVAL) value);
+    }
+
+    void set_number_keyed_str (STRING* key, FLOATVAL value) {
+        DYNSELF.set_integer_keyed_str(key, (INTVAL) value);
+    }
+
+    void set_number_keyed_int (INTVAL key, FLOATVAL value) {
+        DYNSELF.set_integer_keyed_int(key, (INTVAL) value);
+    }
+
+    void set_pmc_keyed (PMC* key, PMC* value) {
+        INTVAL f = VTABLE_get_integer(INTERP, value);
+        DYNSELF.set_integer_keyed(key, f);
+    }
+
+    void set_pmc_keyed_str (STRING* key, PMC* value) {
+        INTVAL f = VTABLE_get_integer(INTERP, value);
+        DYNSELF.set_integer_keyed_str(key, f);
+    }
+
+/*
+
+=item C<void add (PMC* value, PMC* dest)>
+
+=item C<void add_int (INTVAL value, PMC* dest)>
+
+=item C<void add_float (FLOATVAL value, PMC* dest)>
+
+Adds C<value> to the ending offset of the match range, placing the
+result in C<dest>.
+
+=cut
+
+*/
+
+    void add (PMC* value, PMC* dest) {
+        INTVAL v = VTABLE_get_integer(INTERP, dest);
+        SELF.add_int(v, dest);
+    }
+
+    void add_int (INTVAL value, PMC* dest) {
+        VTABLE_morph(INTERP, dest, enum_class_MatchRange);
+        if (RANGE_END(SELF) == -2) {
+            if (RANGE_START(SELF) == -2) {
+                internal_exception(1, "MatchRange: cannot add to nonexistent range");
+                return;
+            }
+            RANGE_END(SELF) = RANGE_START(SELF);
+        }
+        RANGE_END(dest) = RANGE_END(SELF) + value;
+    }
+
+    void add_float (FLOATVAL value, PMC* dest) {
+        SELF.add_int((INTVAL) value, dest);
+    }
+
+/*
+
+=item C<void subtract (PMC* value, PMC* dest)>
+
+=item C<void subtract_int (INTVAL value, PMC* dest)>
+
+=item C<void subtract_float (FLOATVAL value, PMC* dest)>
+
+Subtracts C<value> from the ending offset of the match range, placing
+the result in C<dest>.
+
+=cut
+
+*/
+
+    void subtract (PMC* value, PMC* dest) {
+        INTVAL v = VTABLE_get_integer(INTERP, dest);
+        SELF.subtract_int(v, dest);
+    }
+
+    void subtract_int (INTVAL value, PMC* dest) {
+        VTABLE_morph(INTERP, dest, enum_class_MatchRange);
+        RANGE_END(dest) = RANGE_END(SELF) - value;
+        if (RANGE_END(dest) <= -2 || RANGE_END(dest) < RANGE_START(dest) - 1)
+            RANGE_END(dest) = -2;
+    }
+
+    void subtract_float (FLOATVAL value, PMC* dest) {
+        SELF.subtract_int((INTVAL) value, dest);
+    }
+
+/*
+
+=item C<INTVAL is_equal (PMC* value)>
+
+Compares the matchrange number with C<value> and returs true if they
+are equal.
+
+=cut
+
+*/
+
+    INTVAL is_equal (PMC* value) {
+        if(value->vtable->base_type == enum_class_MatchRange)
+            return (INTVAL)(
+                    RANGE_START(SELF) == RANGE_START(value) &&
+                    RANGE_END(SELF) == RANGE_END(value)
+                );
+        return (INTVAL)0;
+    }
+}
+
+/*
+
+=back
+
+=cut
+
+*/
+
+/*
+ * Local variables:
+ * c-indentation-style: bsd
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ *
+ * vim: expandtab shiftwidth=4:
+*/

Reply via email to