Antti Salmela <[EMAIL PROTECTED]> writes:
> Index is used incorrectly if constant part of the string ends with \d,

Yeah, you're right --- that code predates our use of the new regexp
engine, and it didn't know that escapes aren't simply quoted characters.

Now that I look at it, it's got a multibyte problem too :-(

If you need a patch right away, here's what I applied to 7.4 branch.

                        regards, tom lane

Index: selfuncs.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v
retrieving revision 1.147.2.3
diff -c -r1.147.2.3 selfuncs.c
*** selfuncs.c  27 Feb 2004 21:44:44 -0000      1.147.2.3
--- selfuncs.c  2 Dec 2004 02:35:48 -0000
***************
*** 3218,3223 ****
--- 3218,3225 ----
        char       *match;
        int                     pos,
                                match_pos,
+                               prev_pos,
+                               prev_match_pos,
                                paren_depth;
        char       *patt;
        char       *rest;
***************
*** 3278,3288 ****
  
        /* OK, allocate space for pattern */
        match = palloc(strlen(patt) + 1);
!       match_pos = 0;
  
        /* note start at pos 1 to skip leading ^ */
!       for (pos = 1; patt[pos]; pos++)
        {
                /*
                 * Check for characters that indicate multiple possible matches
                 * here. XXX I suspect isalpha() is not an adequately
--- 3280,3292 ----
  
        /* OK, allocate space for pattern */
        match = palloc(strlen(patt) + 1);
!       prev_match_pos = match_pos = 0;
  
        /* note start at pos 1 to skip leading ^ */
!       for (prev_pos = pos = 1; patt[pos]; )
        {
+               int             len;
+ 
                /*
                 * Check for characters that indicate multiple possible matches
                 * here. XXX I suspect isalpha() is not an adequately
***************
*** 3297,3302 ****
--- 3301,3314 ----
                        break;
  
                /*
+                * In AREs, backslash followed by alphanumeric is an escape, not
+                * a quoted character.  Must treat it as having multiple 
possible
+                * matches.
+                */
+               if (patt[pos] == '\\' && isalnum((unsigned char) patt[pos + 1]))
+                       break;
+ 
+               /*
                 * Check for quantifiers.  Except for +, this means the 
preceding
                 * character is optional, so we must remove it from the prefix
                 * too!
***************
*** 3305,3318 ****
                        patt[pos] == '?' ||
                        patt[pos] == '{')
                {
!                       if (match_pos > 0)
!                               match_pos--;
!                       pos--;
                        break;
                }
                if (patt[pos] == '+')
                {
!                       pos--;
                        break;
                }
                if (patt[pos] == '\\')
--- 3317,3329 ----
                        patt[pos] == '?' ||
                        patt[pos] == '{')
                {
!                       match_pos = prev_match_pos;
!                       pos = prev_pos;
                        break;
                }
                if (patt[pos] == '+')
                {
!                       pos = prev_pos;
                        break;
                }
                if (patt[pos] == '\\')
***************
*** 3322,3328 ****
                        if (patt[pos] == '\0')
                                break;
                }
!               match[match_pos++] = patt[pos];
        }
  
        match[match_pos] = '\0';
--- 3333,3346 ----
                        if (patt[pos] == '\0')
                                break;
                }
!               /* save position in case we need to back up on next loop cycle 
*/
!               prev_match_pos = match_pos;
!               prev_pos = pos;
!               /* must use encoding-aware processing here */
!               len = pg_mblen(&patt[pos]);
!               memcpy(&match[match_pos], &patt[pos], len);
!               match_pos += len;
!               pos += len;
        }
  
        match[match_pos] = '\0';

---------------------------(end of broadcast)---------------------------
TIP 8: explain analyze is your friend

Reply via email to