I needed to create a Match PMC object for holding the match groups
(parenthesized expressions and capturing rules) from a regex match.
Unfortunately, it works by using another new PMC type, the MatchRange
PMC, to signal that an element of its hashtable should be interpreted
specially (as a substring of the input string). One PMC knowing about
another currently means they need to be static PMCs, not dynamic.
(AFAIK) So this is the patch of what I am currently using. I cannot
guarantee it will actually be useful for any other regex implementors,
so I feel uncomfortable committing it myself. (OTOH, if someone needs
something different, they can just add it as a different name.) The
point is, this is something I need for my stuff and the future of
languages/regex is with some version of it, so I can't commit those
changes without this. Although I fully expect the Match PMC will need
to be substantially beefed up to become a full grammar object (or
something...), this is base functionality that it needs to start with.
With these two PMCs, I can construct a match object containing the
hypotheticals $1, $2, etc., as well as a full parse tree comprised of
nested match objects. This does *not* handle saving and restoring
previous hypothetical values, as is needed in the case of
(a)+b
In my compiler, that is handled by the compiled engine code.
Index: classes/match.pmc
===================================================================
RCS file: classes/match.pmc
diff -N classes/match.pmc
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ classes/match.pmc 17 Aug 2004 17:02:01 -0000
@@ -0,0 +1,205 @@
+/*
+Copyright: 2004 The Perl Foundation. All Rights Reserved.
+$Id$
+
+=head1 NAME
+
+classes/match.pmc - Match object for rules
+
+=head1 DESCRIPTION
+
+This is a match object for holding hypothetical variables, the input string,
+etc.
+
+For now, it is really just proof-of-concept code, and I fully expect
+anyone who reads this to hurl. Violently.
+
+=head2 Functions
+
+=over 4
+
+=cut
+
+*/
+
+#include <assert.h>
+#include "parrot/parrot.h"
+
+STRING * hash_get_idx(Interp *interpreter, Hash *hash, PMC *key);
+
+static STRING* make_hash_key(Interp* interpreter, PMC * key)
+{
+ if (key == NULL) {
+ internal_exception(OUT_OF_BOUNDS,
+ "Cannot use NULL key for Match!\n");
+ return NULL;
+ }
+ return key_string(interpreter, key);
+}
+
+static STRING* match_range(Interp* interp, PMC* self, PMC* range)
+{
+ STRING* input_key = const_string(interp, "!INPUT");
+ Hash* hash = (Hash*) PMC_struct_val(self);
+ HashBucket *b;
+ STRING* input;
+ int start, end;
+
+ b = hash_get_bucket(interp, hash, input_key);
+ if (!b) {
+ internal_exception(1, "Match: input string not set");
+ return NULL;
+ }
+
+ input = VTABLE_get_string(interp, (PMC*) b->value);
+ /* These could both be converted to grab UVal_int directly, but
+ * I'll leave it like this for now because it'll test the vtable
+ * members. */
+ start = VTABLE_get_integer_keyed_int(interp, range, 0);
+ end = VTABLE_get_integer_keyed_int(interp, range, 1);
+
+ if (start == -2 || end == -2 || end < start - 1)
+ return NULL;
+ else
+ return string_substr(interp, input, start, end - start + 1, NULL, 0);
+}
+
+static STRING* fetch_string(Interp* interp, PMC* matchobj, PMC* val)
+{
+ if (val->vtable->base_type == enum_class_MatchRange) {
+ return match_range(interp, matchobj, val);
+ } else {
+ return VTABLE_get_string(interp, val);
+ }
+}
+
+static INTVAL fetch_integer(Interp* interp, PMC* matchobj, PMC* val)
+{
+ if (val->vtable->base_type == enum_class_MatchRange) {
+ STRING* valstr = match_range(interp, matchobj, val);
+ return string_to_int(interp, valstr);
+ } else {
+ return VTABLE_get_integer(interp, val);
+ }
+}
+
+pmclass Match extends PerlHash {
+
+/*
+
+=item C<STRING *get_string_keyed_str(STRING *key)>
+
+=cut
+
+*/
+
+ STRING* get_string_keyed_str (STRING* key) {
+ PMC* value;
+ Hash* hash = (Hash*) PMC_struct_val(SELF);
+ HashBucket *b = hash_get_bucket(INTERP, hash, key);
+ if (b == NULL) {
+ /* XXX Warning: use of uninitialized value */
+ /* return VTABLE_get_string(INTERP, undef); */
+ return NULL;
+ }
+ return fetch_string(INTERP, SELF, (PMC*) b->value);
+ }
+
+/*
+
+=item C<STRING *get_string_keyed(PMC *key)>
+
+Returns the string value for the element at C<*key>.
+
+=cut
+
+*/
+
+ STRING* get_string_keyed (PMC* key) {
+ PMC* valpmc;
+ STRING* keystr;
+ HashBucket *b;
+ Hash *hash = PMC_struct_val(SELF);
+ PMC* nextkey;
+
+ switch (PObj_get_FLAGS(key) & KEY_type_FLAGS) {
+ case KEY_integer_FLAG:
+ /* called from iterator with an integer idx in key */
+ /* BUG! This will iterate through the input string as
+ * well as all of the real values. */
+ if (hash->key_type == Hash_key_type_int) {
+ INTVAL i = (INTVAL)hash_get_idx(INTERP, hash, key);
+ return string_from_int(interpreter, i);
+ }
+ return hash_get_idx(INTERP, hash, key);
+ default:
+ keystr = make_hash_key(INTERP, key);
+ }
+ b = hash_get_bucket(INTERP, hash, keystr);
+ if (b == NULL) {
+ return SELF.get_string_keyed_str(keystr);
+ }
+ nextkey = key_next(INTERP, key);
+ valpmc = b->value;
+ if (!nextkey)
+ return fetch_string(INTERP, SELF, valpmc);
+ return VTABLE_get_string_keyed(INTERP, valpmc, nextkey);
+ }
+
+ INTVAL get_integer_keyed (PMC* key) {
+ PMC* valpmc;
+ STRING* keystr;
+ Hash *hash = PMC_struct_val(SELF);
+ HashBucket *b;
+ PMC* nextkey;
+
+ switch (PObj_get_FLAGS(key) & KEY_type_FLAGS) {
+ case KEY_integer_FLAG:
+ /* called from iterator with an integer idx in key
+ * check if we really have Hash_key_type_int
+ */
+ if (hash->key_type == Hash_key_type_int) {
+ return (INTVAL)hash_get_idx(INTERP, hash, key);
+ }
+ else {
+ STRING *s = hash_get_idx(INTERP, hash, key);
+ return string_to_int(interpreter, s);
+ }
+ default:
+ keystr = make_hash_key(INTERP, key);
+ }
+ b = hash_get_bucket(INTERP, hash, keystr);
+ if (b == NULL) {
+ /* XXX Warning: use of uninitialized value */
+ return 0;
+ }
+ nextkey = key_next(INTERP, key);
+ valpmc = b->value;
+ if (!nextkey)
+ return fetch_integer(INTERP, SELF, valpmc);
+ return VTABLE_get_integer_keyed(INTERP, valpmc, nextkey);
+ }
+
+}
+
+/*
+
+=back
+
+=head1 SEE ALSO
+
+F<docs/pdds/pdd08_keys.pod>.
+
+=cut
+
+*/
+
+/*
+ * Local variables:
+ * c-indentation-style: bsd
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ *
+ * vim: expandtab shiftwidth=4:
+*/
Index: classes/matchrange.pmc
===================================================================
RCS file: classes/matchrange.pmc
diff -N classes/matchrange.pmc
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ classes/matchrange.pmc 17 Aug 2004 17:02:01 -0000
@@ -0,0 +1,422 @@
+/*
+Copyright: 2004 The Perl Foundation. All Rights Reserved.
+$Id: matchrange.pmc,v 1.8 2004/07/29 09:12:38 leo Exp $
+
+=head1 NAME
+
+classes/matchrange.pmc - MatchRange Numbers PMC Class
+
+=head1 DESCRIPTION
+
+C<MatchRange> provides a representation of regular expression matches,
+by describing the starting and ending offsets within the input string.
+This PMC only provides the start and end values; the Match PMC is
+responsible for interpreting these values appropriately (namely, as a
+substring within the input string.)
+
+This really probably shouldn't be a top-level class; it probably ought
+to inherit from "IntPair" or something like that. (But it can't just
+*be* "IntPair", because the Match PMC uses the type to figure out
+whether it has a regular variable or something that it needs to
+interpret as a string given its value and the input string.)
+
+=head2 Functions
+
+=over 4
+
+=cut
+
+*/
+
+#include "parrot/parrot.h"
+
+#define RANGE_START(pmc) UVal_int((pmc)->obj.u)
+#define RANGE_END(pmc) UVal_int2((pmc)->obj.u)
+
+/*
+
+=item C<static INTVAL*
+matchrange_locate_keyed_int(Parrot_Interp interp, PMC* self, STRING *key)>
+
+Interpret the string C<key>; valid keys are C<start> and C<end>,
+representing the offsets of the first and last characters of the
+matching range.
+
+=cut
+
+*/
+
+static INTVAL*
+matchrange_locate_keyed_int(Parrot_Interp interp, PMC* self, STRING *key)
+{
+ STRING *start = string_from_cstring(interp, "start", 5);
+ STRING *end;
+
+ if(0 == string_equal(interp, key, start))
+ return &RANGE_START(self);
+ end = string_from_cstring(interp, "end", 3);
+ if(0 == string_equal(interp, key, end))
+ return &RANGE_END(self);
+ internal_exception(KEY_NOT_FOUND,
+ "MatchRange: key is neither 'start' nor 'end'\n");
+ return NULL;
+}
+
+pmclass MatchRange {
+
+/*
+
+=back
+
+=head2 Methods
+
+=over 4
+
+=item C<void init()>
+
+Initializes the matchrange with the [-2,-2].
+
+=item C<void init_pmc (PMC* initializer)>
+
+Initializes the matchrange number with the specified values.
+(not implemented)
+
+=item C<void destroy ()>
+
+Cleans up.
+
+=item C<PMC* clone ()>
+
+Creates an identical copy of the matchrange number.
+
+=cut
+
+*/
+
+ void init () {
+ PObj_active_destroy_SET(SELF);
+ RANGE_START(SELF) = RANGE_END(SELF) = -2;
+ }
+
+ void init_pmc (PMC* initializer) {
+ /* XXX not implemented */
+ DYNSELF.init();
+ }
+
+ void destroy () {
+ }
+
+ void morph (INTVAL type) {
+ if (SELF->vtable->base_type == type)
+ return;
+ SUPER(type);
+ }
+
+ PMC* clone () {
+ PMC* dest = pmc_new_noinit(INTERP, SELF->vtable->base_type);
+ PObj_active_destroy_SET(dest);
+ RANGE_START(dest) = RANGE_START(SELF);
+ RANGE_END(dest) = RANGE_END(SELF);
+ return dest;
+ }
+
+/*
+
+=item C<INTVAL get_bool ()>
+
+Returns true if the match range is defined.
+
+=cut
+
+*/
+
+ INTVAL get_bool () {
+ return (INTVAL)(RANGE_START(SELF) != -2 && RANGE_END(SELF) != -2);
+ }
+
+/*
+
+=item C<INTVAL get_integer_keyed (PMC* key)>
+
+=item C<INTVAL get_integer_keyed_str (STRING* key)>
+
+=item C<FLOATVAL get_number_keyed (PMC* key)>
+
+=item C<FLOATVAL get_number_keyed_str (STRING* key)>
+
+=item C<PMC* get_pmc_keyed (PMC* key)>
+
+=item C<PMC* get_pmc_keyed_str (STRING* key)>
+
+Returns the requested number (real part for C<real> and imaginary for C<imag>).
+
+=cut
+
+*/
+
+ INTVAL get_integer_keyed (PMC* key) {
+ if (key_type(INTERP, key) == KEY_integer_FLAG) {
+ return SELF.get_integer_keyed_int(PMC_int_val(key));
+ } else {
+ STRING* s = VTABLE_get_string(INTERP, key);
+ return DYNSELF.get_integer_keyed_str(s);
+ }
+ }
+
+ INTVAL get_integer_keyed_str (STRING* key) {
+ return (INTVAL)(DYNSELF.get_number_keyed_str(key));
+ }
+
+ FLOATVAL get_number_keyed (PMC* key) {
+ STRING* s = VTABLE_get_string(INTERP, key);
+ return DYNSELF.get_number_keyed_str(s);
+ }
+
+ FLOATVAL get_number_keyed_str (STRING* key) {
+ INTVAL *num = matchrange_locate_keyed_int(INTERP, SELF, key);
+ if(num)
+ return *num;
+ return 0.0;
+ }
+
+ PMC* get_pmc_keyed (PMC* key) {
+ STRING* s = VTABLE_get_string(INTERP, key);
+ return DYNSELF.get_pmc_keyed_str(s);
+ }
+
+ PMC* get_pmc_keyed_str (STRING* key) {
+ PMC *ret;
+ FLOATVAL val;
+
+ ret = pmc_new(INTERP, enum_class_Float);
+ val = DYNSELF.get_number_keyed_str(key);
+ VTABLE_set_number_native(INTERP, ret, val);
+ return ret;
+ }
+
+/*
+
+=item C<INTVAL get_integer_keyed_int(INTVAL key)>
+
+Quick hack to emulate get_start() and get_end():
+
+ key = 0 ... get start offset
+ key = 1 ... get end offset
+
+=item C<void set_integer_keyed_int(INTVAL key, INTVAL v)>
+
+Set start or end depending on key
+
+*/
+
+ INTVAL get_integer_keyed_int(INTVAL key) {
+ switch (key) {
+ case 0:
+ return RANGE_START(SELF);
+ case 1:
+ return RANGE_END(SELF);
+ default:
+ internal_exception(1, "MatchRange: key must be 0 or 1");
+ }
+ return -2;
+ }
+
+ void set_integer_keyed_int(INTVAL key, INTVAL v) {
+ switch (key) {
+ case 0:
+ RANGE_START(SELF) = v;
+ break;
+ case 1:
+ RANGE_END(SELF) = v;
+ break;
+ default:
+ internal_exception(1, "MatchRange: key must be 0 or 1");
+ }
+ }
+/*
+
+=item C<void set_pmc (PMC* value)>
+
+if C<value> is a MatchRange PMC then the matchrange number is set to
+its value; otherwise throw an exception.
+
+=cut
+
+*/
+
+ void set_pmc (PMC* value) {
+ if(value->vtable->base_type == enum_class_MatchRange) {
+ RANGE_START(SELF) = RANGE_START(value);
+ RANGE_END(SELF) = RANGE_END(value);
+ }
+ else {
+ internal_exception(1, "MatchRange: cannot set from pmc");
+ }
+ }
+
+/*
+
+=item C<void set_integer_keyed (PMC* key, INTVAL value)>
+
+=item C<void set_integer_keyed_str (STRING* key, INTVAL value)>
+
+=item C<void set_number_keyed (PMC* key, FLOATVAL value)>
+
+=item C<void set_number_keyed_str (STRING* key, FLOATVAL value)>
+
+=item C<void set_number_keyed_int (INTVAL key, FLOATVAL value)>
+
+=item C<void set_pmc_keyed (PMC* key, PMC* value)>
+
+=item C<void set_pmc_keyed_str (STRING* key, PMC* value)>
+
+Sets the requested number (start offset for C<start> and end offset for
+C<end>) to C<value>. For the keyed_int variants, 0 means RANGE_START,
+1 means RANGE_END.
+
+=cut
+
+*/
+
+ void set_integer_keyed (PMC* key, INTVAL value) {
+ if (key_type(INTERP, key) == KEY_integer_FLAG) {
+ return SELF.set_integer_keyed_int(PMC_int_val(key), value);
+ } else {
+ STRING* s = VTABLE_get_string(INTERP, key);
+ DYNSELF.set_integer_keyed_str(s, value);
+ }
+ }
+
+ void set_integer_keyed_str (STRING* key, INTVAL value) {
+ INTVAL *num = matchrange_locate_keyed_int(INTERP, SELF, key);
+ if(num)
+ *num = value;
+ }
+
+ void set_number_keyed (PMC* key, FLOATVAL value) {
+ DYNSELF.set_integer_keyed(key, (INTVAL) value);
+ }
+
+ void set_number_keyed_str (STRING* key, FLOATVAL value) {
+ DYNSELF.set_integer_keyed_str(key, (INTVAL) value);
+ }
+
+ void set_number_keyed_int (INTVAL key, FLOATVAL value) {
+ DYNSELF.set_integer_keyed_int(key, (INTVAL) value);
+ }
+
+ void set_pmc_keyed (PMC* key, PMC* value) {
+ INTVAL f = VTABLE_get_integer(INTERP, value);
+ DYNSELF.set_integer_keyed(key, f);
+ }
+
+ void set_pmc_keyed_str (STRING* key, PMC* value) {
+ INTVAL f = VTABLE_get_integer(INTERP, value);
+ DYNSELF.set_integer_keyed_str(key, f);
+ }
+
+/*
+
+=item C<void add (PMC* value, PMC* dest)>
+
+=item C<void add_int (INTVAL value, PMC* dest)>
+
+=item C<void add_float (FLOATVAL value, PMC* dest)>
+
+Adds C<value> to the ending offset of the match range, placing the
+result in C<dest>.
+
+=cut
+
+*/
+
+ void add (PMC* value, PMC* dest) {
+ INTVAL v = VTABLE_get_integer(INTERP, dest);
+ SELF.add_int(v, dest);
+ }
+
+ void add_int (INTVAL value, PMC* dest) {
+ VTABLE_morph(INTERP, dest, enum_class_MatchRange);
+ if (RANGE_END(SELF) == -2) {
+ if (RANGE_START(SELF) == -2) {
+ internal_exception(1, "MatchRange: cannot add to nonexistent range");
+ return;
+ }
+ RANGE_END(SELF) = RANGE_START(SELF);
+ }
+ RANGE_END(dest) = RANGE_END(SELF) + value;
+ }
+
+ void add_float (FLOATVAL value, PMC* dest) {
+ SELF.add_int((INTVAL) value, dest);
+ }
+
+/*
+
+=item C<void subtract (PMC* value, PMC* dest)>
+
+=item C<void subtract_int (INTVAL value, PMC* dest)>
+
+=item C<void subtract_float (FLOATVAL value, PMC* dest)>
+
+Subtracts C<value> from the ending offset of the match range, placing
+the result in C<dest>.
+
+=cut
+
+*/
+
+ void subtract (PMC* value, PMC* dest) {
+ INTVAL v = VTABLE_get_integer(INTERP, dest);
+ SELF.subtract_int(v, dest);
+ }
+
+ void subtract_int (INTVAL value, PMC* dest) {
+ VTABLE_morph(INTERP, dest, enum_class_MatchRange);
+ RANGE_END(dest) = RANGE_END(SELF) - value;
+ if (RANGE_END(dest) <= -2 || RANGE_END(dest) < RANGE_START(dest) - 1)
+ RANGE_END(dest) = -2;
+ }
+
+ void subtract_float (FLOATVAL value, PMC* dest) {
+ SELF.subtract_int((INTVAL) value, dest);
+ }
+
+/*
+
+=item C<INTVAL is_equal (PMC* value)>
+
+Compares the matchrange number with C<value> and returs true if they
+are equal.
+
+=cut
+
+*/
+
+ INTVAL is_equal (PMC* value) {
+ if(value->vtable->base_type == enum_class_MatchRange)
+ return (INTVAL)(
+ RANGE_START(SELF) == RANGE_START(value) &&
+ RANGE_END(SELF) == RANGE_END(value)
+ );
+ return (INTVAL)0;
+ }
+}
+
+/*
+
+=back
+
+=cut
+
+*/
+
+/*
+ * Local variables:
+ * c-indentation-style: bsd
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ *
+ * vim: expandtab shiftwidth=4:
+*/