Hello !  

I'm trying to add a new option to ext/misc/regex.c it's "\p()" that's like
the LUA operator "%b"  

%b/xy/, where /x/ and /y/ are two distinct characters; such item matches
strings that start with?/x/, end with?/y/, and where the /x/ and /y/ are
/balanced/. This means that, if one reads the string from left to right,
counting /+1/ for an /x/ and /-1/ for a /y/, the ending /y/ is the first /y/
where the count reaches 0. For instance, the item %b() matches expressions
with balanced parentheses.  

But I'm having trouble to understand how to register/run this new re_opcode
bellow is what I got so far, the code to check for the balanced chars is
already used elsewhere it works:  

_________regex.c diff  

@@ -45,10 +45,11 @@
?**???? \W????? Non-word character
?**???? \d????? Digit
?**???? \D????? Non-digit
?**???? \s????? Whitespace character
?**???? \S????? Non-whitespace character
+**???? \pxy????? Match balanced where xy are the open/close
characters characters, like LUA %b 
?**
?** A nondeterministic finite automaton (NFA) is used for matching, so the
?** performance is bounded by O(N*M) where N is the size of the regular
?** expression and M is the size of the input string.? The matcher never
?** exhibits exponential behavior.? Note that the X{p,q} operator expands
@@ -91,10 +92,11 @@ SQLITE_EXTENSION_INIT1
?#define RE_OP_DIGIT????? 13??? /* digit:? [0-9] */
?#define RE_OP_NOTDIGIT?? 14??? /* Not a digit */
?#define RE_OP_SPACE????? 15??? /* space:? [ \t\n\r\v\f] */
?#define RE_OP_NOTSPACE?? 16??? /* Not a digit */
?#define RE_OP_BOUNDARY?? 17??? /* Boundary between word and non-word
*/
+#define RE_OP_BALANCED?? 18??? /* Balanced between open/close character
*/
?
?/* Each opcode is a "state" in the NFA */
?typedef unsigned short ReStateNumber;
?
?/* Because this is an NFA and not a DFA, multiple states can be active at
@@ -189,10 +191,29 @@ static int re_digit_char(int c){
?/* Return true if c is a perl "space" character:? [ \t\r\n\v\f] */
?static int re_space_char(int c){
?? return c==' ' || c=='\t' || c=='\n' || c=='\r' || c=='\v' || c=='\f';
?}
?
+static int re_balanced(
+??? ReInput *p,
+??? int cb, /* char that opens a balanced expression */
+??? int ce? /* char that closes a balanced expression */
+){
+? int c = re_next_char(p);
+? if(c != cb) return 0; // string doesnt start with open char
+? int cont = 1;
+? while ((c = re_next_char(p)) {
+??? if (c == ce) {
+????? if (--cont == 0) {
+????????? return ce;
+????? }
+??? }
+??? else if (c == cb) cont++;
+? }
+? return 0; // string ends out of balance
+}
+
?/* Run a compiled regular expression on the zero-terminated input
?** string zIn[].? Return true on a match and false if there is no match.
?*/
?static int re_match(ReCompiled *pRe, const unsigned char *zIn, int nIn){
?? ReStateSet aStateSet[2], *pThis, *pNext;
@@ -277,10 +298,14 @@ static int re_match(ReCompiled *pRe, const unsigned
char *zIn, int nIn){
???????? }
???????? case RE_OP_BOUNDARY: {
?????????? if( re_word_char(c)!=re_word_char(cPrev) )
re_add_state(pThis, x+1);
?????????? break;
???????? }
+??????? case RE_OP_BALANCED: { //////here the re_balanced should be
called with ReInput *p with it's two parameters \bxy
+????????? if( re_balanced(c)!=re_word_char(cPrev) )
re_add_state(pThis, x+1);
+????????? break;
+??????? }
???????? case RE_OP_ANYSTAR: {
?????????? re_add_state(pNext, x);
?????????? re_add_state(pThis, x+1);
?????????? break;
???????? }
@@ -580,18 +605,29 @@ static const char *re_subcompile_string(ReCompiled *p){
???????? int specialOp = 0;
???????? switch( rePeek(p) ){
?????????? case 'b': specialOp = RE_OP_BOUNDARY;?? break;
?????????? case 'd': specialOp = RE_OP_DIGIT;????? break;
?????????? case 'D': specialOp = RE_OP_NOTDIGIT;?? break;
+????????? case 'p': specialOp = RE_OP_BALANCED;?? break;
?????????? case 's': specialOp = RE_OP_SPACE;????? break;
?????????? case 'S': specialOp = RE_OP_NOTSPACE;?? break;
?????????? case 'w': specialOp = RE_OP_WORD;?????? break;
?????????? case 'W': specialOp = RE_OP_NOTWORD;??? break;
???????? }
???????? if( specialOp ){
?????????? p->sIn.i++;
-????????? re_append(p, specialOp, 0);
+????????? if(specialOp == RE_OP_BALANCED){? ///here I need to
register the op_code and it's two parameters \bxy
+??????????? c = p->xNextChar(&p->sIn);
+??????????? if( c==0 ) return "balanced open character expected";
+??????????? re_append(p, specialOp, c);
+??????????? p->sIn.i++;
+??????????? c = p->xNextChar(&p->sIn);
+??????????? if( c==0 ) return "balanced close character
expected";
+??????????? re_append(p, specialOp, c);
+????????? }else{
+??????????? re_append(p, specialOp, 0);
+????????? }
???????? }else{
?????????? c = re_esc_char(p);
?????????? re_append(p, RE_OP_MATCH, c);
???????? }
???????? break;  

_________  

?  

Any help are welcome !  

The code if other people have interested is released with the same license as
sqlite.  

Cheers !

Reply via email to