Hello ! I'm trying to add a new option to ext/misc/regex.c it's "\p()" that's like the LUA operator "%b"
%b/xy/, where /x/ and /y/ are two distinct characters; such item matches strings that start with?/x/, end with?/y/, and where the /x/ and /y/ are /balanced/. This means that, if one reads the string from left to right, counting /+1/ for an /x/ and /-1/ for a /y/, the ending /y/ is the first /y/ where the count reaches 0. For instance, the item %b() matches expressions with balanced parentheses. But I'm having trouble to understand how to register/run this new re_opcode bellow is what I got so far, the code to check for the balanced chars is already used elsewhere it works: _________regex.c diff @@ -45,10 +45,11 @@ ?**???? \W????? Non-word character ?**???? \d????? Digit ?**???? \D????? Non-digit ?**???? \s????? Whitespace character ?**???? \S????? Non-whitespace character +**???? \pxy????? Match balanced where xy are the open/close characters characters, like LUA %b ?** ?** A nondeterministic finite automaton (NFA) is used for matching, so the ?** performance is bounded by O(N*M) where N is the size of the regular ?** expression and M is the size of the input string.? The matcher never ?** exhibits exponential behavior.? Note that the X{p,q} operator expands @@ -91,10 +92,11 @@ SQLITE_EXTENSION_INIT1 ?#define RE_OP_DIGIT????? 13??? /* digit:? [0-9] */ ?#define RE_OP_NOTDIGIT?? 14??? /* Not a digit */ ?#define RE_OP_SPACE????? 15??? /* space:? [ \t\n\r\v\f] */ ?#define RE_OP_NOTSPACE?? 16??? /* Not a digit */ ?#define RE_OP_BOUNDARY?? 17??? /* Boundary between word and non-word */ +#define RE_OP_BALANCED?? 18??? /* Balanced between open/close character */ ? ?/* Each opcode is a "state" in the NFA */ ?typedef unsigned short ReStateNumber; ? ?/* Because this is an NFA and not a DFA, multiple states can be active at @@ -189,10 +191,29 @@ static int re_digit_char(int c){ ?/* Return true if c is a perl "space" character:? [ \t\r\n\v\f] */ ?static int re_space_char(int c){ ?? return c==' ' || c=='\t' || c=='\n' || c=='\r' || c=='\v' || c=='\f'; ?} ? +static int re_balanced( +??? ReInput *p, +??? int cb, /* char that opens a balanced expression */ +??? int ce? /* char that closes a balanced expression */ +){ +? int c = re_next_char(p); +? if(c != cb) return 0; // string doesnt start with open char +? int cont = 1; +? while ((c = re_next_char(p)) { +??? if (c == ce) { +????? if (--cont == 0) { +????????? return ce; +????? } +??? } +??? else if (c == cb) cont++; +? } +? return 0; // string ends out of balance +} + ?/* Run a compiled regular expression on the zero-terminated input ?** string zIn[].? Return true on a match and false if there is no match. ?*/ ?static int re_match(ReCompiled *pRe, const unsigned char *zIn, int nIn){ ?? ReStateSet aStateSet[2], *pThis, *pNext; @@ -277,10 +298,14 @@ static int re_match(ReCompiled *pRe, const unsigned char *zIn, int nIn){ ???????? } ???????? case RE_OP_BOUNDARY: { ?????????? if( re_word_char(c)!=re_word_char(cPrev) ) re_add_state(pThis, x+1); ?????????? break; ???????? } +??????? case RE_OP_BALANCED: { //////here the re_balanced should be called with ReInput *p with it's two parameters \bxy +????????? if( re_balanced(c)!=re_word_char(cPrev) ) re_add_state(pThis, x+1); +????????? break; +??????? } ???????? case RE_OP_ANYSTAR: { ?????????? re_add_state(pNext, x); ?????????? re_add_state(pThis, x+1); ?????????? break; ???????? } @@ -580,18 +605,29 @@ static const char *re_subcompile_string(ReCompiled *p){ ???????? int specialOp = 0; ???????? switch( rePeek(p) ){ ?????????? case 'b': specialOp = RE_OP_BOUNDARY;?? break; ?????????? case 'd': specialOp = RE_OP_DIGIT;????? break; ?????????? case 'D': specialOp = RE_OP_NOTDIGIT;?? break; +????????? case 'p': specialOp = RE_OP_BALANCED;?? break; ?????????? case 's': specialOp = RE_OP_SPACE;????? break; ?????????? case 'S': specialOp = RE_OP_NOTSPACE;?? break; ?????????? case 'w': specialOp = RE_OP_WORD;?????? break; ?????????? case 'W': specialOp = RE_OP_NOTWORD;??? break; ???????? } ???????? if( specialOp ){ ?????????? p->sIn.i++; -????????? re_append(p, specialOp, 0); +????????? if(specialOp == RE_OP_BALANCED){? ///here I need to register the op_code and it's two parameters \bxy +??????????? c = p->xNextChar(&p->sIn); +??????????? if( c==0 ) return "balanced open character expected"; +??????????? re_append(p, specialOp, c); +??????????? p->sIn.i++; +??????????? c = p->xNextChar(&p->sIn); +??????????? if( c==0 ) return "balanced close character expected"; +??????????? re_append(p, specialOp, c); +????????? }else{ +??????????? re_append(p, specialOp, 0); +????????? } ???????? }else{ ?????????? c = re_esc_char(p); ?????????? re_append(p, RE_OP_MATCH, c); ???????? } ???????? break; _________ ? Any help are welcome ! The code if other people have interested is released with the same license as sqlite. Cheers !