I needed to create a Match PMC object for holding the match groups (parenthesized expressions and capturing rules) from a regex match. Unfortunately, it works by using another new PMC type, the MatchRange PMC, to signal that an element of its hashtable should be interpreted specially (as a substring of the input string). One PMC knowing about another currently means they need to be static PMCs, not dynamic. (AFAIK) So this is the patch of what I am currently using. I cannot guarantee it will actually be useful for any other regex implementors, so I feel uncomfortable committing it myself. (OTOH, if someone needs something different, they can just add it as a different name.) The point is, this is something I need for my stuff and the future of languages/regex is with some version of it, so I can't commit those changes without this. Although I fully expect the Match PMC will need to be substantially beefed up to become a full grammar object (or something...), this is base functionality that it needs to start with.
With these two PMCs, I can construct a match object containing the hypotheticals $1, $2, etc., as well as a full parse tree comprised of nested match objects. This does *not* handle saving and restoring previous hypothetical values, as is needed in the case of (a)+b In my compiler, that is handled by the compiled engine code.
Index: classes/match.pmc =================================================================== RCS file: classes/match.pmc diff -N classes/match.pmc --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ classes/match.pmc 17 Aug 2004 17:02:01 -0000 @@ -0,0 +1,205 @@ +/* +Copyright: 2004 The Perl Foundation. All Rights Reserved. +$Id$ + +=head1 NAME + +classes/match.pmc - Match object for rules + +=head1 DESCRIPTION + +This is a match object for holding hypothetical variables, the input string, +etc. + +For now, it is really just proof-of-concept code, and I fully expect +anyone who reads this to hurl. Violently. + +=head2 Functions + +=over 4 + +=cut + +*/ + +#include <assert.h> +#include "parrot/parrot.h" + +STRING * hash_get_idx(Interp *interpreter, Hash *hash, PMC *key); + +static STRING* make_hash_key(Interp* interpreter, PMC * key) +{ + if (key == NULL) { + internal_exception(OUT_OF_BOUNDS, + "Cannot use NULL key for Match!\n"); + return NULL; + } + return key_string(interpreter, key); +} + +static STRING* match_range(Interp* interp, PMC* self, PMC* range) +{ + STRING* input_key = const_string(interp, "!INPUT"); + Hash* hash = (Hash*) PMC_struct_val(self); + HashBucket *b; + STRING* input; + int start, end; + + b = hash_get_bucket(interp, hash, input_key); + if (!b) { + internal_exception(1, "Match: input string not set"); + return NULL; + } + + input = VTABLE_get_string(interp, (PMC*) b->value); + /* These could both be converted to grab UVal_int directly, but + * I'll leave it like this for now because it'll test the vtable + * members. */ + start = VTABLE_get_integer_keyed_int(interp, range, 0); + end = VTABLE_get_integer_keyed_int(interp, range, 1); + + if (start == -2 || end == -2 || end < start - 1) + return NULL; + else + return string_substr(interp, input, start, end - start + 1, NULL, 0); +} + +static STRING* fetch_string(Interp* interp, PMC* matchobj, PMC* val) +{ + if (val->vtable->base_type == enum_class_MatchRange) { + return match_range(interp, matchobj, val); + } else { + return VTABLE_get_string(interp, val); + } +} + +static INTVAL fetch_integer(Interp* interp, PMC* matchobj, PMC* val) +{ + if (val->vtable->base_type == enum_class_MatchRange) { + STRING* valstr = match_range(interp, matchobj, val); + return string_to_int(interp, valstr); + } else { + return VTABLE_get_integer(interp, val); + } +} + +pmclass Match extends PerlHash { + +/* + +=item C<STRING *get_string_keyed_str(STRING *key)> + +=cut + +*/ + + STRING* get_string_keyed_str (STRING* key) { + PMC* value; + Hash* hash = (Hash*) PMC_struct_val(SELF); + HashBucket *b = hash_get_bucket(INTERP, hash, key); + if (b == NULL) { + /* XXX Warning: use of uninitialized value */ + /* return VTABLE_get_string(INTERP, undef); */ + return NULL; + } + return fetch_string(INTERP, SELF, (PMC*) b->value); + } + +/* + +=item C<STRING *get_string_keyed(PMC *key)> + +Returns the string value for the element at C<*key>. + +=cut + +*/ + + STRING* get_string_keyed (PMC* key) { + PMC* valpmc; + STRING* keystr; + HashBucket *b; + Hash *hash = PMC_struct_val(SELF); + PMC* nextkey; + + switch (PObj_get_FLAGS(key) & KEY_type_FLAGS) { + case KEY_integer_FLAG: + /* called from iterator with an integer idx in key */ + /* BUG! This will iterate through the input string as + * well as all of the real values. */ + if (hash->key_type == Hash_key_type_int) { + INTVAL i = (INTVAL)hash_get_idx(INTERP, hash, key); + return string_from_int(interpreter, i); + } + return hash_get_idx(INTERP, hash, key); + default: + keystr = make_hash_key(INTERP, key); + } + b = hash_get_bucket(INTERP, hash, keystr); + if (b == NULL) { + return SELF.get_string_keyed_str(keystr); + } + nextkey = key_next(INTERP, key); + valpmc = b->value; + if (!nextkey) + return fetch_string(INTERP, SELF, valpmc); + return VTABLE_get_string_keyed(INTERP, valpmc, nextkey); + } + + INTVAL get_integer_keyed (PMC* key) { + PMC* valpmc; + STRING* keystr; + Hash *hash = PMC_struct_val(SELF); + HashBucket *b; + PMC* nextkey; + + switch (PObj_get_FLAGS(key) & KEY_type_FLAGS) { + case KEY_integer_FLAG: + /* called from iterator with an integer idx in key + * check if we really have Hash_key_type_int + */ + if (hash->key_type == Hash_key_type_int) { + return (INTVAL)hash_get_idx(INTERP, hash, key); + } + else { + STRING *s = hash_get_idx(INTERP, hash, key); + return string_to_int(interpreter, s); + } + default: + keystr = make_hash_key(INTERP, key); + } + b = hash_get_bucket(INTERP, hash, keystr); + if (b == NULL) { + /* XXX Warning: use of uninitialized value */ + return 0; + } + nextkey = key_next(INTERP, key); + valpmc = b->value; + if (!nextkey) + return fetch_integer(INTERP, SELF, valpmc); + return VTABLE_get_integer_keyed(INTERP, valpmc, nextkey); + } + +} + +/* + +=back + +=head1 SEE ALSO + +F<docs/pdds/pdd08_keys.pod>. + +=cut + +*/ + +/* + * Local variables: + * c-indentation-style: bsd + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * + * vim: expandtab shiftwidth=4: +*/ Index: classes/matchrange.pmc =================================================================== RCS file: classes/matchrange.pmc diff -N classes/matchrange.pmc --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ classes/matchrange.pmc 17 Aug 2004 17:02:01 -0000 @@ -0,0 +1,422 @@ +/* +Copyright: 2004 The Perl Foundation. All Rights Reserved. +$Id: matchrange.pmc,v 1.8 2004/07/29 09:12:38 leo Exp $ + +=head1 NAME + +classes/matchrange.pmc - MatchRange Numbers PMC Class + +=head1 DESCRIPTION + +C<MatchRange> provides a representation of regular expression matches, +by describing the starting and ending offsets within the input string. +This PMC only provides the start and end values; the Match PMC is +responsible for interpreting these values appropriately (namely, as a +substring within the input string.) + +This really probably shouldn't be a top-level class; it probably ought +to inherit from "IntPair" or something like that. (But it can't just +*be* "IntPair", because the Match PMC uses the type to figure out +whether it has a regular variable or something that it needs to +interpret as a string given its value and the input string.) + +=head2 Functions + +=over 4 + +=cut + +*/ + +#include "parrot/parrot.h" + +#define RANGE_START(pmc) UVal_int((pmc)->obj.u) +#define RANGE_END(pmc) UVal_int2((pmc)->obj.u) + +/* + +=item C<static INTVAL* +matchrange_locate_keyed_int(Parrot_Interp interp, PMC* self, STRING *key)> + +Interpret the string C<key>; valid keys are C<start> and C<end>, +representing the offsets of the first and last characters of the +matching range. + +=cut + +*/ + +static INTVAL* +matchrange_locate_keyed_int(Parrot_Interp interp, PMC* self, STRING *key) +{ + STRING *start = string_from_cstring(interp, "start", 5); + STRING *end; + + if(0 == string_equal(interp, key, start)) + return &RANGE_START(self); + end = string_from_cstring(interp, "end", 3); + if(0 == string_equal(interp, key, end)) + return &RANGE_END(self); + internal_exception(KEY_NOT_FOUND, + "MatchRange: key is neither 'start' nor 'end'\n"); + return NULL; +} + +pmclass MatchRange { + +/* + +=back + +=head2 Methods + +=over 4 + +=item C<void init()> + +Initializes the matchrange with the [-2,-2]. + +=item C<void init_pmc (PMC* initializer)> + +Initializes the matchrange number with the specified values. +(not implemented) + +=item C<void destroy ()> + +Cleans up. + +=item C<PMC* clone ()> + +Creates an identical copy of the matchrange number. + +=cut + +*/ + + void init () { + PObj_active_destroy_SET(SELF); + RANGE_START(SELF) = RANGE_END(SELF) = -2; + } + + void init_pmc (PMC* initializer) { + /* XXX not implemented */ + DYNSELF.init(); + } + + void destroy () { + } + + void morph (INTVAL type) { + if (SELF->vtable->base_type == type) + return; + SUPER(type); + } + + PMC* clone () { + PMC* dest = pmc_new_noinit(INTERP, SELF->vtable->base_type); + PObj_active_destroy_SET(dest); + RANGE_START(dest) = RANGE_START(SELF); + RANGE_END(dest) = RANGE_END(SELF); + return dest; + } + +/* + +=item C<INTVAL get_bool ()> + +Returns true if the match range is defined. + +=cut + +*/ + + INTVAL get_bool () { + return (INTVAL)(RANGE_START(SELF) != -2 && RANGE_END(SELF) != -2); + } + +/* + +=item C<INTVAL get_integer_keyed (PMC* key)> + +=item C<INTVAL get_integer_keyed_str (STRING* key)> + +=item C<FLOATVAL get_number_keyed (PMC* key)> + +=item C<FLOATVAL get_number_keyed_str (STRING* key)> + +=item C<PMC* get_pmc_keyed (PMC* key)> + +=item C<PMC* get_pmc_keyed_str (STRING* key)> + +Returns the requested number (real part for C<real> and imaginary for C<imag>). + +=cut + +*/ + + INTVAL get_integer_keyed (PMC* key) { + if (key_type(INTERP, key) == KEY_integer_FLAG) { + return SELF.get_integer_keyed_int(PMC_int_val(key)); + } else { + STRING* s = VTABLE_get_string(INTERP, key); + return DYNSELF.get_integer_keyed_str(s); + } + } + + INTVAL get_integer_keyed_str (STRING* key) { + return (INTVAL)(DYNSELF.get_number_keyed_str(key)); + } + + FLOATVAL get_number_keyed (PMC* key) { + STRING* s = VTABLE_get_string(INTERP, key); + return DYNSELF.get_number_keyed_str(s); + } + + FLOATVAL get_number_keyed_str (STRING* key) { + INTVAL *num = matchrange_locate_keyed_int(INTERP, SELF, key); + if(num) + return *num; + return 0.0; + } + + PMC* get_pmc_keyed (PMC* key) { + STRING* s = VTABLE_get_string(INTERP, key); + return DYNSELF.get_pmc_keyed_str(s); + } + + PMC* get_pmc_keyed_str (STRING* key) { + PMC *ret; + FLOATVAL val; + + ret = pmc_new(INTERP, enum_class_Float); + val = DYNSELF.get_number_keyed_str(key); + VTABLE_set_number_native(INTERP, ret, val); + return ret; + } + +/* + +=item C<INTVAL get_integer_keyed_int(INTVAL key)> + +Quick hack to emulate get_start() and get_end(): + + key = 0 ... get start offset + key = 1 ... get end offset + +=item C<void set_integer_keyed_int(INTVAL key, INTVAL v)> + +Set start or end depending on key + +*/ + + INTVAL get_integer_keyed_int(INTVAL key) { + switch (key) { + case 0: + return RANGE_START(SELF); + case 1: + return RANGE_END(SELF); + default: + internal_exception(1, "MatchRange: key must be 0 or 1"); + } + return -2; + } + + void set_integer_keyed_int(INTVAL key, INTVAL v) { + switch (key) { + case 0: + RANGE_START(SELF) = v; + break; + case 1: + RANGE_END(SELF) = v; + break; + default: + internal_exception(1, "MatchRange: key must be 0 or 1"); + } + } +/* + +=item C<void set_pmc (PMC* value)> + +if C<value> is a MatchRange PMC then the matchrange number is set to +its value; otherwise throw an exception. + +=cut + +*/ + + void set_pmc (PMC* value) { + if(value->vtable->base_type == enum_class_MatchRange) { + RANGE_START(SELF) = RANGE_START(value); + RANGE_END(SELF) = RANGE_END(value); + } + else { + internal_exception(1, "MatchRange: cannot set from pmc"); + } + } + +/* + +=item C<void set_integer_keyed (PMC* key, INTVAL value)> + +=item C<void set_integer_keyed_str (STRING* key, INTVAL value)> + +=item C<void set_number_keyed (PMC* key, FLOATVAL value)> + +=item C<void set_number_keyed_str (STRING* key, FLOATVAL value)> + +=item C<void set_number_keyed_int (INTVAL key, FLOATVAL value)> + +=item C<void set_pmc_keyed (PMC* key, PMC* value)> + +=item C<void set_pmc_keyed_str (STRING* key, PMC* value)> + +Sets the requested number (start offset for C<start> and end offset for +C<end>) to C<value>. For the keyed_int variants, 0 means RANGE_START, +1 means RANGE_END. + +=cut + +*/ + + void set_integer_keyed (PMC* key, INTVAL value) { + if (key_type(INTERP, key) == KEY_integer_FLAG) { + return SELF.set_integer_keyed_int(PMC_int_val(key), value); + } else { + STRING* s = VTABLE_get_string(INTERP, key); + DYNSELF.set_integer_keyed_str(s, value); + } + } + + void set_integer_keyed_str (STRING* key, INTVAL value) { + INTVAL *num = matchrange_locate_keyed_int(INTERP, SELF, key); + if(num) + *num = value; + } + + void set_number_keyed (PMC* key, FLOATVAL value) { + DYNSELF.set_integer_keyed(key, (INTVAL) value); + } + + void set_number_keyed_str (STRING* key, FLOATVAL value) { + DYNSELF.set_integer_keyed_str(key, (INTVAL) value); + } + + void set_number_keyed_int (INTVAL key, FLOATVAL value) { + DYNSELF.set_integer_keyed_int(key, (INTVAL) value); + } + + void set_pmc_keyed (PMC* key, PMC* value) { + INTVAL f = VTABLE_get_integer(INTERP, value); + DYNSELF.set_integer_keyed(key, f); + } + + void set_pmc_keyed_str (STRING* key, PMC* value) { + INTVAL f = VTABLE_get_integer(INTERP, value); + DYNSELF.set_integer_keyed_str(key, f); + } + +/* + +=item C<void add (PMC* value, PMC* dest)> + +=item C<void add_int (INTVAL value, PMC* dest)> + +=item C<void add_float (FLOATVAL value, PMC* dest)> + +Adds C<value> to the ending offset of the match range, placing the +result in C<dest>. + +=cut + +*/ + + void add (PMC* value, PMC* dest) { + INTVAL v = VTABLE_get_integer(INTERP, dest); + SELF.add_int(v, dest); + } + + void add_int (INTVAL value, PMC* dest) { + VTABLE_morph(INTERP, dest, enum_class_MatchRange); + if (RANGE_END(SELF) == -2) { + if (RANGE_START(SELF) == -2) { + internal_exception(1, "MatchRange: cannot add to nonexistent range"); + return; + } + RANGE_END(SELF) = RANGE_START(SELF); + } + RANGE_END(dest) = RANGE_END(SELF) + value; + } + + void add_float (FLOATVAL value, PMC* dest) { + SELF.add_int((INTVAL) value, dest); + } + +/* + +=item C<void subtract (PMC* value, PMC* dest)> + +=item C<void subtract_int (INTVAL value, PMC* dest)> + +=item C<void subtract_float (FLOATVAL value, PMC* dest)> + +Subtracts C<value> from the ending offset of the match range, placing +the result in C<dest>. + +=cut + +*/ + + void subtract (PMC* value, PMC* dest) { + INTVAL v = VTABLE_get_integer(INTERP, dest); + SELF.subtract_int(v, dest); + } + + void subtract_int (INTVAL value, PMC* dest) { + VTABLE_morph(INTERP, dest, enum_class_MatchRange); + RANGE_END(dest) = RANGE_END(SELF) - value; + if (RANGE_END(dest) <= -2 || RANGE_END(dest) < RANGE_START(dest) - 1) + RANGE_END(dest) = -2; + } + + void subtract_float (FLOATVAL value, PMC* dest) { + SELF.subtract_int((INTVAL) value, dest); + } + +/* + +=item C<INTVAL is_equal (PMC* value)> + +Compares the matchrange number with C<value> and returs true if they +are equal. + +=cut + +*/ + + INTVAL is_equal (PMC* value) { + if(value->vtable->base_type == enum_class_MatchRange) + return (INTVAL)( + RANGE_START(SELF) == RANGE_START(value) && + RANGE_END(SELF) == RANGE_END(value) + ); + return (INTVAL)0; + } +} + +/* + +=back + +=cut + +*/ + +/* + * Local variables: + * c-indentation-style: bsd + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * + * vim: expandtab shiftwidth=4: +*/