Hi!

I have a problem with UDF.

For acceleration of spell check I needed in UDF function which be able to
replace substrings based on regular expressions.

Such PHP functions are ereg_replace and preg_replace.

Trying to write UDF for MySQL I has confronted with a strange problem.

At compilation in a separate executed file my function works correctly and
gives out correct results.

But when the same function I compile as UDF - I recive interesting effects.

MySQL at performance UDF through times breaks off connection. And when the
connection does not break off - than MySQL gives out wrong results.

I already have despaired to force UDF to work.
I use MySQL-3.23.33 on Slackware Linux with 2.4.2 kernel

This is source:


#ifdef STANDARD
#include <stdio.h>
#include <string.h>
#else
#include <global.h>
#include <my_sys.h>
#endif
#include <mysql.h>
#include <m_ctype.h>
#include <m_string.h>
#include <regex.h>

#define NS 10

#ifdef HAVE_DLOPEN

char* reg_replace(char* string, char* pattern, char* replace, int icase, int
extended);

extern "C" {
my_bool ereg_replace_init(UDF_INIT *initid, UDF_ARGS *args, char *message);
char* ereg_replace(UDF_INIT *initid, UDF_ARGS *args, char *result,
        unsigned long *length, char *is_null, char *error);
my_bool eregi_replace_init(UDF_INIT *initid, UDF_ARGS *args, char *message);
char* eregi_replace(UDF_INIT *initid, UDF_ARGS *args, char *result,
        unsigned long *length, char *is_null, char *error);
}

my_bool ereg_replace_init(UDF_INIT *initid, UDF_ARGS *args, char *message)
{
  if (args->arg_count != 3 ||
   args->arg_type[0] != STRING_RESULT ||
   args->arg_type[1] != STRING_RESULT ||
   args->arg_type[2] != STRING_RESULT
  )
  {
    strcpy(message,"ereg_replace require 3 string arguments");
    return 1;
  }
  return 0;
}

char *ereg_replace(UDF_INIT *initid, UDF_ARGS *args, char *result,
        unsigned long *length, char *is_null, char *error)
{
 if (result = reg_replace(args->args[0], args->args[1], args->args[2], 0,
1)) {
  *length = strlen(result);
  return result;
 } else {
  *is_null = 1;
  return result;
 }
}

my_bool eregi_replace_init(UDF_INIT *initid, UDF_ARGS *args, char *message)
{
  if (args->arg_count != 3 ||
   args->arg_type[0] != STRING_RESULT ||
   args->arg_type[1] != STRING_RESULT ||
   args->arg_type[2] != STRING_RESULT
  )
  {
    strcpy(message,"eregi_replace require 3 string arguments");
    return 1;
  }
  return 0;
}

char *eregi_replace(UDF_INIT *initid, UDF_ARGS *args, char *result,
        unsigned long *length, char *is_null, char *error)
{
 if (result = reg_replace(args->args[0], args->args[1], args->args[2], 1,
1)) {
  *length = strlen(result);
  return result;
 } else {
  *is_null = 1;
  return result;
 }
}

char* reg_replace(char* string, char* pattern, char* replace, int icase, int
extended)
{
 char *result;
 regex_t* re;
 regmatch_t* subs;
 char *buf, *nbuf, *walkbuf;
 const char *walk;
 int buf_len;
 int pos, tmp, string_len, new_l;
 int err, copts = 0;

 string_len = strlen(string);

 if (icase) copts = REG_ICASE;
 if (extended) copts |= REG_EXTENDED;

 re = (regex_t*) malloc(sizeof(regex_t));
 subs = (regmatch_t*) malloc(sizeof(regmatch_t)*NS);

 err = regcomp(re, pattern, copts);

 if (err) {
  result = (char*) malloc(strlen(string) + 1);
  memcpy(result, string, strlen(string)+1);
  free(re);
  free(subs);
  return result;
 }

 buf_len = 2 * string_len + 1;
 buf = (char*) malloc(buf_len * sizeof(char));
 if (!buf) {
  regfree(re);
  free(re);
  free(subs);
  return 0;
 }

 err = pos = 0;
 buf[0] = '\0';

 while (!err) {
  err = regexec(re, &string[pos], (size_t) NS, subs, (pos ? REG_NOTBOL :
0));
  if (!err) {
   new_l = strlen(buf) + subs[0].rm_so;
   walk = replace;
   while (*walk)
    if ('\\' == *walk
     && '0' <= walk[1] && '9' >= walk[1]
     && subs[walk[1] - '0'].rm_so > -1
     && subs[walk[1] - '0'].rm_eo > -1) {
      new_l += subs[walk[1] - '0'].rm_eo - subs[walk[1] - '0'].rm_so;
      walk += 2;
    } else {
     new_l++;
     walk++;
    }

   if (new_l + 1 > buf_len) {
    buf_len = 1 + buf_len + 2 * new_l;
    nbuf = (char*) malloc(buf_len);
    memcpy (nbuf, buf, strlen(buf)+1);
    free(buf);
    buf = nbuf;
   }
   tmp = strlen(buf);

   buf[strlen(buf) + subs[0].rm_so + 1] = 0;
   memcpy(buf + strlen(buf), &string[pos], subs[0].rm_so);

   walkbuf = &buf[tmp + subs[0].rm_so];
   walk = replace;
   while (*walk)
    if ('\\' == *walk
     && '0' <= walk[1] && '9' >= walk[1]
     && subs[walk[1] - '0'].rm_so > -1
     && subs[walk[1] - '0'].rm_eo > -1) {
     tmp = subs[walk[1] - '0'].rm_eo
      - subs[walk[1] - '0'].rm_so;
     memcpy (walkbuf,
       &string[pos + subs[walk[1] - '0'].rm_so],
       tmp);
     walkbuf += tmp;
     walk += 2;
    } else
     *walkbuf++ = *walk++;
   *walkbuf = '\0';

   if (subs[0].rm_so == subs[0].rm_eo) {
    if (subs[0].rm_so + pos >= string_len) break;
    new_l = strlen (buf) + 1;
    if (new_l + 1 > buf_len) {
     buf_len = 1 + buf_len + 2 * new_l;
     nbuf = (char*) malloc(buf_len * sizeof(char));
     memcpy (nbuf, buf, strlen(buf)+1);
     free(buf);
     buf = nbuf;
    }
    pos += subs[0].rm_eo + 1;
    buf [new_l-1] = string [pos-1];
    buf [new_l] = '\0';
   } else {
    pos += subs[0].rm_eo;
   }
  } else {
   new_l = strlen(buf) + strlen(&string[pos]);
   if (new_l + 1 > buf_len) {
    buf_len = new_l + 1;
    nbuf = (char*) malloc(buf_len * sizeof(char));
    memcpy (nbuf, buf, strlen(buf)+1);
    free(buf);
    buf = nbuf;
   }
   memcpy(buf + strlen(buf), &string[pos], strlen(&string[pos]) + 1);
  }
 }
 regfree(re);
 free(re);
 free(subs);

 if (strlen(buf) > 0) {
  result = (char*) malloc(strlen(buf) + 1);
  memcpy(result, buf, strlen(buf) + 1);
 }
 return result;
}

#endif /* HAVE_DLOPEN */



---------------------------------------------------------------------
Before posting, please check:
   http://www.mysql.com/manual.php   (the manual)
   http://lists.mysql.com/           (the list archive)

To request this thread, e-mail <[EMAIL PROTECTED]>
To unsubscribe, e-mail <[EMAIL PROTECTED]>
Trouble unsubscribing? Try: http://lists.mysql.com/php/unsubscribe.php

Reply via email to