Patch removed because we already have this functionality.

---------------------------------------------------------------------------

a_ogawa00 wrote:
> 
> This patch provides a new function regexp_replace.
> regexp_replace extends a replace function and enables text search
> by the regular expression. And, a back reference can be used within
> a replace string.
> (This patch for PostgreSQL 7.4.3)
> 
> Function: regexp_replace(str, pattern, replace_str)
> Retuen Type: text
> Description: Replace all matched string in str.
>              pattern is regular expression pattern.
>              replace_str is replace string that can use '\1' - '\9', and
> '\&'.
>              '\1' - '\9' is back reference to the n'th subexpression.
>              '\&' is matched string.
> 
> (example1)
> select regexp_replace('ABC-DEF', '(\\w+)-(\\w+)', '\\2-\\1')
> result: DEF-ABC
> 
> (example2)
> update tab1 set col1 = regexp_replace(col1, '[A-Z]', '');
> 
> ---
> Atsushi Ogawa
> [EMAIL PROTECTED]
> 
> --- cut here ---
> 
> *** ./src/backend/regex/regexec.c.orig        Tue Jul 20 08:45:39 2004
> --- ./src/backend/regex/regexec.c     Tue Jul 20 08:49:36 2004
> ***************
> *** 110,115 ****
> --- 110,116 ----
>       regmatch_t *pmatch;
>       rm_detail_t *details;
>       chr                *start;                      /* start of string */
> +     chr                *search_start;       /* search start of string */
>       chr                *stop;                       /* just past end of 
> string */
>       int                     err;                    /* error code if any (0 
> none) */
>       regoff_t   *mem;                        /* memory vector for 
> backtracking */
> ***************
> *** 168,173 ****
> --- 169,175 ----
>   pg_regexec(regex_t *re,
>                  const chr *string,
>                  size_t len,
> +                size_t search_start,
>                  rm_detail_t *details,
>                  size_t nmatch,
>                  regmatch_t pmatch[],
> ***************
> *** 219,224 ****
> --- 221,227 ----
>               v->pmatch = pmatch;
>       v->details = details;
>       v->start = (chr *) string;
> +     v->search_start = (chr *) string + search_start;
>       v->stop = (chr *) string + len;
>       v->err = 0;
>       if (backref)
> ***************
> *** 288,294 ****
>       NOERR();
>       MDEBUG(("\nsearch at %ld\n", LOFF(v->start)));
>       cold = NULL;
> !     close = shortest(v, s, v->start, v->start, v->stop, &cold, (int *)
> NULL);
>       freedfa(s);
>       NOERR();
>       if (v->g->cflags & REG_EXPECT)
> --- 291,298 ----
>       NOERR();
>       MDEBUG(("\nsearch at %ld\n", LOFF(v->start)));
>       cold = NULL;
> !     close = shortest(v, s, v->search_start, v->search_start, v->stop,
> !                                      &cold, (int *) NULL);
>       freedfa(s);
>       NOERR();
>       if (v->g->cflags & REG_EXPECT)
> ***************
> *** 415,421 ****
> 
>       assert(d != NULL && s != NULL);
>       cold = NULL;
> !     close = v->start;
>       do
>       {
>               MDEBUG(("\ncsearch at %ld\n", LOFF(close)));
> --- 419,425 ----
> 
>       assert(d != NULL && s != NULL);
>       cold = NULL;
> !     close = v->search_start;
>       do
>       {
>               MDEBUG(("\ncsearch at %ld\n", LOFF(close)));
> *** ./src/backend/utils/adt/regexp.c.orig     Tue Jul 20 08:50:08 2004
> --- ./src/backend/utils/adt/regexp.c  Tue Jul 20 09:00:05 2004
> ***************
> *** 80,116 ****
> 
> 
>   /*
> !  * RE_compile_and_execute - compile and execute a RE, caching if possible
>    *
> !  * Returns TRUE on match, FALSE on no match
>    *
> !  *  text_re --- the pattern, expressed as an *untoasted* TEXT object
> !  *  dat --- the data to match against (need not be null-terminated)
> !  *  dat_len --- the length of the data string
> !  *  cflags --- compile options for the pattern
> !  *  nmatch, pmatch  --- optional return area for match details
>    *
> !  * Both pattern and data are given in the database encoding.  We
> internally
> !  * convert to array of pg_wchar which is what Spencer's regex package
> wants.
>    */
> ! static bool
> ! RE_compile_and_execute(text *text_re, unsigned char *dat, int dat_len,
> !                                        int cflags, int nmatch, regmatch_t 
> *pmatch)
>   {
>       int                     text_re_len = VARSIZE(text_re);
> -     pg_wchar   *data;
> -     size_t          data_len;
>       pg_wchar   *pattern;
>       size_t          pattern_len;
>       int                     i;
>       int                     regcomp_result;
> -     int                     regexec_result;
>       cached_re_str re_temp;
> 
> -     /* Convert data string to wide characters */
> -     data = (pg_wchar *) palloc((dat_len + 1) * sizeof(pg_wchar));
> -     data_len = pg_mb2wchar_with_len(dat, data, dat_len);
> -
>       /*
>        * Look for a match among previously compiled REs.      Since the data
>        * structure is self-organizing with most-used entries at the front,
> --- 80,105 ----
> 
> 
>   /*
> !  * RE_compile_and_cache - compile a RE, caching if possible
>    *
> !  * Returns regex_t
>    *
> !  *  text_re --- the pattern, expressed as an *untoasted* TEXT object
> !  *  cflags --- compile options for the pattern
>    *
> !  * Pattern is given in the database encoding.  We internally convert to
> !  * array of pg_wchar which is what Spencer's regex package wants.
>    */
> ! static regex_t
> ! RE_compile_and_cache(text *text_re, int cflags)
>   {
>       int                     text_re_len = VARSIZE(text_re);
>       pg_wchar   *pattern;
>       size_t          pattern_len;
>       int                     i;
>       int                     regcomp_result;
>       cached_re_str re_temp;
> 
>       /*
>        * Look for a match among previously compiled REs.      Since the data
>        * structure is self-organizing with most-used entries at the front,
> ***************
> *** 132,149 ****
>                               re_array[0] = re_temp;
>                       }
> 
> !                     /* Perform RE match and return result */
> !                     regexec_result = pg_regexec(&re_array[0].cre_re,
> !                                                                             
> data,
> !                                                                             
> data_len,
> !                                                                             
> NULL,   /* no details */
> !                                                                             
> nmatch,
> !                                                                             
> pmatch,
> !                                                                             
> 0);
> !
> !                     pfree(data);
> !
> !                     return (regexec_result == 0);
>               }
>       }
> 
> --- 121,127 ----
>                               re_array[0] = re_temp;
>                       }
> 
> !                     return re_array[0].cre_re;
>               }
>       }
> 
> ***************
> *** 210,219 ****
> --- 188,231 ----
>       re_array[0] = re_temp;
>       num_res++;
> 
> +     return re_array[0].cre_re;
> + }
> +
> + /*
> +  * RE_compile_and_execute - compile and execute a RE, caching if possible
> +  *
> +  * Returns TRUE on match, FALSE on no match
> +  *
> +  *  text_re --- the pattern, expressed as an *untoasted* TEXT object
> +  *  dat --- the data to match against (need not be null-terminated)
> +  *  dat_len --- the length of the data string
> +  *  cflags --- compile options for the pattern
> +  *  nmatch, pmatch  --- optional return area for match details
> +  *
> +  * Both pattern and data are given in the database encoding.  We
> internally
> +  * convert to array of pg_wchar which is what Spencer's regex package
> wants.
> +  */
> + static bool
> + RE_compile_and_execute(text *text_re, unsigned char *dat, int dat_len,
> +                                        int cflags, int nmatch, regmatch_t 
> *pmatch)
> + {
> +     pg_wchar   *data;
> +     size_t          data_len;
> +     int                     regexec_result;
> +     regex_t         re;
> +
> +     /* Convert data string to wide characters */
> +     data = (pg_wchar *) palloc((dat_len + 1) * sizeof(pg_wchar));
> +     data_len = pg_mb2wchar_with_len(dat, data, dat_len);
> +
> +     /* Compile RE */
> +     re = RE_compile_and_cache(text_re, cflags);
> +
>       /* Perform RE match and return result */
>       regexec_result = pg_regexec(&re_array[0].cre_re,
>                                                               data,
>                                                               data_len,
> +                                                             0,
>                                                               NULL,   /* no 
> details */
>                                                               nmatch,
>                                                               pmatch,
> ***************
> *** 415,420 ****
> --- 427,452 ----
>       }
> 
>       PG_RETURN_NULL();
> + }
> +
> + /*
> +  * textregexreplace()
> +  *      Return a replace string matched by a regular expression.
> +  */
> + Datum
> + textregexreplace(PG_FUNCTION_ARGS)
> + {
> +     text       *s = PG_GETARG_TEXT_P(0);
> +     text       *p = PG_GETARG_TEXT_P(1);
> +     text       *r = PG_GETARG_TEXT_P(2);
> +     regex_t         re;
> +
> +     re = RE_compile_and_cache(p, regex_flavor);
> +
> +     return (DirectFunctionCall3(replace_text_regexp,
> +                                                             
> PointerGetDatum(s),
> +                                                             
> PointerGetDatum(&re),
> +                                                             
> PointerGetDatum(r)));
>   }
> 
>   /* similar_escape()
> *** ./src/backend/utils/adt/varlena.c.orig    Tue Jul 20 09:00:17 2004
> --- ./src/backend/utils/adt/varlena.c Tue Jul 20 10:23:32 2004
> ***************
> *** 28,33 ****
> --- 28,34 ----
>   #include "utils/builtins.h"
>   #include "utils/lsyscache.h"
>   #include "utils/pg_locale.h"
> + #include "regex/regex.h"
> 
> 
>   typedef struct varlena unknown;
> ***************
> *** 1971,1976 ****
> --- 1972,2122 ----
>       ret_text = PG_STR_GET_TEXT(str->data);
>       pfree(str->data);
>       pfree(str);
> +
> +     PG_RETURN_TEXT_P(ret_text);
> + }
> +
> + /*
> +  * have_escape_in_regexp_replace_str
> +  * check replace string have escape char
> +  */
> + static bool
> + have_escape_in_regexp_replace_str(const char *replace_str)
> + {
> +     return (strchr(replace_str, '\\') != NULL);
> + }
> +
> + #define REGEXP_REPLACE_BACKREF_CNT          10
> + /*
> +  * appendStringInfoRegexpSubstr
> +  * append string for regexp back references.
> +  */
> + static void
> + appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
> +     regmatch_t *pmatch, text *buf_text, int search_start)
> + {
> +     const char *pstart = PG_TEXT_GET_STR(replace_text);
> +     const char *p = pstart;
> +     const char *pnext;
> +
> +     text       *add_text;
> +     int                     so;
> +     int                     eo;
> +
> +     for(;;) {
> +             pnext = strchr(p, '\\');
> +             if(pnext == NULL) break;
> +
> +             add_text = text_substring(PointerGetDatum(replace_text),
> +                                                               p - pstart + 
> 1, pnext - p, false);
> +             appendStringInfoString(str, PG_TEXT_GET_STR(add_text));
> +             pfree(add_text);
> +
> +             p = pnext + 1;
> +             so = eo = -1;
> +
> +             if(*p >= '1' && *p <= '9') {
> +                     int             idx = *p - '0';
> +                     so = pmatch[idx].rm_so - search_start;
> +                     eo = pmatch[idx].rm_eo - search_start;
> +                     p++;
> +             } else {
> +                     switch(*p) {
> +                     case '&':
> +                             so = pmatch[0].rm_so - search_start;
> +                             eo = pmatch[0].rm_eo - search_start;
> +                             p++;
> +                             break;
> +                     }
> +             }
> +
> +             if(so != -1 && eo != -1) {
> +                     add_text = text_substring(PointerGetDatum(buf_text),
> +                                                                       so + 
> 1, (eo - so), false);
> +                     appendStringInfoString(str, PG_TEXT_GET_STR(add_text));
> +                     pfree(add_text);
> +             }
> +     }
> +
> +     add_text = text_substring(PointerGetDatum(replace_text),
> +                                                       p - pstart + 1, -1, 
> true);
> +     appendStringInfoString(str, PG_TEXT_GET_STR(add_text));
> +     pfree(add_text);
> + }
> +
> + /*
> +  * replace_text_regexp
> +  * replace text using regexp
> + */
> + Datum
> + replace_text_regexp(PG_FUNCTION_ARGS)
> + {
> +     text       *left_text;
> +     text       *right_text;
> +     text       *buf_text;
> +     text       *ret_text;
> +     text       *src_text = PG_GETARG_TEXT_P(0);
> +     char       *src_text_str = PG_TEXT_GET_STR(src_text);
> +     int                     src_text_len = TEXTLEN(src_text);
> +     regex_t    *re = (regex_t *)PG_GETARG_POINTER(1);
> +     text       *replace_text = PG_GETARG_TEXT_P(2);
> +     char       *replace_str = PG_TEXT_GET_STR(replace_text);
> +     StringInfo      str = makeStringInfo();
> +     int                     regexec_result;
> +     regmatch_t      pmatch[REGEXP_REPLACE_BACKREF_CNT];
> +     pg_wchar   *data;
> +     size_t          data_len;
> +     int                     search_start;
> +     bool            have_escape;
> +
> +     buf_text = TEXTDUP(src_text);
> +
> +     /* Convert data string to wide characters */
> +     data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
> +     data_len = pg_mb2wchar_with_len(src_text_str, data,
> strlen(src_text_str));
> +
> +     have_escape = have_escape_in_regexp_replace_str(replace_str);
> +
> +     for(search_start = 0; search_start <= data_len;) {
> +             regexec_result = pg_regexec(re,
> +                                                                     data,
> +                                                                     
> data_len,
> +                                                                     
> search_start,
> +                                                                     NULL,   
> /* no details */
> +                                                                     
> REGEXP_REPLACE_BACKREF_CNT,
> +                                                                     pmatch,
> +                                                                     0);
> +             if(regexec_result != 0) break;
> +
> +             left_text = text_substring(PointerGetDatum(buf_text),
> +                                                                1, 
> pmatch[0].rm_so - search_start, false);
> +             right_text = text_substring(PointerGetDatum(buf_text),
> +                                                                
> pmatch[0].rm_eo - search_start + 1,
> +                                                                -1, true);
> +
> +             appendStringInfoString(str, PG_TEXT_GET_STR(left_text));
> +             if(have_escape) {
> +                     appendStringInfoRegexpSubstr(str, replace_text, pmatch,
> +                                                                             
>  buf_text, search_start);
> +             } else {
> +                     appendStringInfoString(str, replace_str);
> +             }
> +
> +             pfree(buf_text);
> +             pfree(left_text);
> +             buf_text = right_text;
> +
> +             search_start = pmatch[0].rm_eo;
> +             if(pmatch[0].rm_so == pmatch[0].rm_eo) search_start++;
> +     }
> +
> +     appendStringInfoString(str, PG_TEXT_GET_STR(buf_text));
> +     pfree(buf_text);
> +
> +     ret_text = PG_STR_GET_TEXT(str->data);
> +     pfree(str->data);
> +     pfree(str);
> +     pfree(data);
> 
>       PG_RETURN_TEXT_P(ret_text);
>   }
> *** ./src/include/catalog/pg_proc.h.orig      Tue Jul 20 09:24:11 2004
> --- ./src/include/catalog/pg_proc.h   Tue Jul 20 09:26:11 2004
> ***************
> *** 2186,2191 ****
> --- 2186,2193 ----
>   DESCR("return portion of string");
>   DATA(insert OID =  2087 ( replace      PGNSP PGUID 12 f f t f i 3 25 "25 25
> 25"  replace_text - _null_ ));
>   DESCR("replace all occurrences of old_substr with new_substr in string");
> + DATA(insert OID =  2167 ( regexp_replace    PGNSP PGUID 12 f f t f i 3 25
> "25 25 25"  textregexreplace - _null_ ));
> + DESCR("replace text using regexp");
>   DATA(insert OID =  2088 ( split_part   PGNSP PGUID 12 f f t f i 3 25 "25
> 25 23"  split_text - _null_ ));
>   DESCR("split string by field_sep and return field_num");
>   DATA(insert OID =  2089 ( to_hex       PGNSP PGUID 12 f f t f i 1 25 "23"
> to_hex32 - _null_ ));
> *** ./src/include/regex/regex.h.orig  Tue Jul 20 08:51:06 2004
> --- ./src/include/regex/regex.h       Tue Jul 20 08:51:16 2004
> ***************
> *** 163,169 ****
>    * the prototypes for exported functions
>    */
>   extern int  pg_regcomp(regex_t *, const pg_wchar *, size_t, int);
> ! extern int  pg_regexec(regex_t *, const pg_wchar *, size_t, rm_detail_t *,
> size_t, regmatch_t[], int);
>   extern void pg_regfree(regex_t *);
>   extern size_t pg_regerror(int, const regex_t *, char *, size_t);
> 
> --- 163,169 ----
>    * the prototypes for exported functions
>    */
>   extern int  pg_regcomp(regex_t *, const pg_wchar *, size_t, int);
> ! extern int  pg_regexec(regex_t *, const pg_wchar *, size_t, size_t,
> rm_detail_t *, size_t, regmatch_t[], int);
>   extern void pg_regfree(regex_t *);
>   extern size_t pg_regerror(int, const regex_t *, char *, size_t);
> 
> *** ./src/include/utils/builtins.h.orig       Tue Jul 20 09:11:19 2004
> --- ./src/include/utils/builtins.h    Tue Jul 20 09:11:46 2004
> ***************
> *** 408,413 ****
> --- 408,414 ----
>   extern Datum texticregexeq(PG_FUNCTION_ARGS);
>   extern Datum texticregexne(PG_FUNCTION_ARGS);
>   extern Datum textregexsubstr(PG_FUNCTION_ARGS);
> + extern Datum textregexreplace(PG_FUNCTION_ARGS);
>   extern Datum similar_escape(PG_FUNCTION_ARGS);
>   extern const char *assign_regex_flavor(const char *value,
>                                       bool doit, bool interactive);
> ***************
> *** 537,542 ****
> --- 538,544 ----
>   extern bool SplitIdentifierString(char *rawstring, char separator,
>                                         List **namelist);
>   extern Datum replace_text(PG_FUNCTION_ARGS);
> + extern Datum replace_text_regexp(PG_FUNCTION_ARGS);
>   extern Datum split_text(PG_FUNCTION_ARGS);
>   extern Datum text_to_array(PG_FUNCTION_ARGS);
>   extern Datum array_to_text(PG_FUNCTION_ARGS);
> 
> __________________________________________________
> Do You Yahoo!?
> http://bb.yahoo.co.jp/
> 
> 
> ---------------------------(end of broadcast)---------------------------
> TIP 3: if posting/reading through Usenet, please send an appropriate
>       subscribe-nomail command to [EMAIL PROTECTED] so that your
>       message can get through to the mailing list cleanly
> 

-- 
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 359-1001
  +  If your life is a hard drive,     |  13 Roberts Road
  +  Christ can be your backup.        |  Newtown Square, Pennsylvania 19073

---------------------------(end of broadcast)---------------------------
TIP 5: Have you checked our extensive FAQ?

               http://www.postgresql.org/docs/faq

Reply via email to