Hi!
I have a problem with UDF.
For acceleration of spell check I needed in UDF function which be able to
replace substrings based on regular expressions.
Such PHP functions are ereg_replace and preg_replace.
Trying to write UDF for MySQL I has confronted with a strange problem.
At compilation in a separate executed file my function works correctly and
gives out correct results.
But when the same function I compile as UDF - I recive interesting effects.
MySQL at performance UDF through times breaks off connection. And when the
connection does not break off - than MySQL gives out wrong results.
I already have despaired to force UDF to work.
I use MySQL-3.23.33 on Slackware Linux with 2.4.2 kernel
This is source:
#ifdef STANDARD
#include <stdio.h>
#include <string.h>
#else
#include <global.h>
#include <my_sys.h>
#endif
#include <mysql.h>
#include <m_ctype.h>
#include <m_string.h>
#include <regex.h>
#define NS 10
#ifdef HAVE_DLOPEN
char* reg_replace(char* string, char* pattern, char* replace, int icase, int
extended);
extern "C" {
my_bool ereg_replace_init(UDF_INIT *initid, UDF_ARGS *args, char *message);
char* ereg_replace(UDF_INIT *initid, UDF_ARGS *args, char *result,
unsigned long *length, char *is_null, char *error);
my_bool eregi_replace_init(UDF_INIT *initid, UDF_ARGS *args, char *message);
char* eregi_replace(UDF_INIT *initid, UDF_ARGS *args, char *result,
unsigned long *length, char *is_null, char *error);
}
my_bool ereg_replace_init(UDF_INIT *initid, UDF_ARGS *args, char *message)
{
if (args->arg_count != 3 ||
args->arg_type[0] != STRING_RESULT ||
args->arg_type[1] != STRING_RESULT ||
args->arg_type[2] != STRING_RESULT
)
{
strcpy(message,"ereg_replace require 3 string arguments");
return 1;
}
return 0;
}
char *ereg_replace(UDF_INIT *initid, UDF_ARGS *args, char *result,
unsigned long *length, char *is_null, char *error)
{
if (result = reg_replace(args->args[0], args->args[1], args->args[2], 0,
1)) {
*length = strlen(result);
return result;
} else {
*is_null = 1;
return result;
}
}
my_bool eregi_replace_init(UDF_INIT *initid, UDF_ARGS *args, char *message)
{
if (args->arg_count != 3 ||
args->arg_type[0] != STRING_RESULT ||
args->arg_type[1] != STRING_RESULT ||
args->arg_type[2] != STRING_RESULT
)
{
strcpy(message,"eregi_replace require 3 string arguments");
return 1;
}
return 0;
}
char *eregi_replace(UDF_INIT *initid, UDF_ARGS *args, char *result,
unsigned long *length, char *is_null, char *error)
{
if (result = reg_replace(args->args[0], args->args[1], args->args[2], 1,
1)) {
*length = strlen(result);
return result;
} else {
*is_null = 1;
return result;
}
}
char* reg_replace(char* string, char* pattern, char* replace, int icase, int
extended)
{
char *result;
regex_t* re;
regmatch_t* subs;
char *buf, *nbuf, *walkbuf;
const char *walk;
int buf_len;
int pos, tmp, string_len, new_l;
int err, copts = 0;
string_len = strlen(string);
if (icase) copts = REG_ICASE;
if (extended) copts |= REG_EXTENDED;
re = (regex_t*) malloc(sizeof(regex_t));
subs = (regmatch_t*) malloc(sizeof(regmatch_t)*NS);
err = regcomp(re, pattern, copts);
if (err) {
result = (char*) malloc(strlen(string) + 1);
memcpy(result, string, strlen(string)+1);
free(re);
free(subs);
return result;
}
buf_len = 2 * string_len + 1;
buf = (char*) malloc(buf_len * sizeof(char));
if (!buf) {
regfree(re);
free(re);
free(subs);
return 0;
}
err = pos = 0;
buf[0] = '\0';
while (!err) {
err = regexec(re, &string[pos], (size_t) NS, subs, (pos ? REG_NOTBOL :
0));
if (!err) {
new_l = strlen(buf) + subs[0].rm_so;
walk = replace;
while (*walk)
if ('\\' == *walk
&& '0' <= walk[1] && '9' >= walk[1]
&& subs[walk[1] - '0'].rm_so > -1
&& subs[walk[1] - '0'].rm_eo > -1) {
new_l += subs[walk[1] - '0'].rm_eo - subs[walk[1] - '0'].rm_so;
walk += 2;
} else {
new_l++;
walk++;
}
if (new_l + 1 > buf_len) {
buf_len = 1 + buf_len + 2 * new_l;
nbuf = (char*) malloc(buf_len);
memcpy (nbuf, buf, strlen(buf)+1);
free(buf);
buf = nbuf;
}
tmp = strlen(buf);
buf[strlen(buf) + subs[0].rm_so + 1] = 0;
memcpy(buf + strlen(buf), &string[pos], subs[0].rm_so);
walkbuf = &buf[tmp + subs[0].rm_so];
walk = replace;
while (*walk)
if ('\\' == *walk
&& '0' <= walk[1] && '9' >= walk[1]
&& subs[walk[1] - '0'].rm_so > -1
&& subs[walk[1] - '0'].rm_eo > -1) {
tmp = subs[walk[1] - '0'].rm_eo
- subs[walk[1] - '0'].rm_so;
memcpy (walkbuf,
&string[pos + subs[walk[1] - '0'].rm_so],
tmp);
walkbuf += tmp;
walk += 2;
} else
*walkbuf++ = *walk++;
*walkbuf = '\0';
if (subs[0].rm_so == subs[0].rm_eo) {
if (subs[0].rm_so + pos >= string_len) break;
new_l = strlen (buf) + 1;
if (new_l + 1 > buf_len) {
buf_len = 1 + buf_len + 2 * new_l;
nbuf = (char*) malloc(buf_len * sizeof(char));
memcpy (nbuf, buf, strlen(buf)+1);
free(buf);
buf = nbuf;
}
pos += subs[0].rm_eo + 1;
buf [new_l-1] = string [pos-1];
buf [new_l] = '\0';
} else {
pos += subs[0].rm_eo;
}
} else {
new_l = strlen(buf) + strlen(&string[pos]);
if (new_l + 1 > buf_len) {
buf_len = new_l + 1;
nbuf = (char*) malloc(buf_len * sizeof(char));
memcpy (nbuf, buf, strlen(buf)+1);
free(buf);
buf = nbuf;
}
memcpy(buf + strlen(buf), &string[pos], strlen(&string[pos]) + 1);
}
}
regfree(re);
free(re);
free(subs);
if (strlen(buf) > 0) {
result = (char*) malloc(strlen(buf) + 1);
memcpy(result, buf, strlen(buf) + 1);
}
return result;
}
#endif /* HAVE_DLOPEN */
---------------------------------------------------------------------
Before posting, please check:
http://www.mysql.com/manual.php (the manual)
http://lists.mysql.com/ (the list archive)
To request this thread, e-mail <[EMAIL PROTECTED]>
To unsubscribe, e-mail <[EMAIL PROTECTED]>
Trouble unsubscribing? Try: http://lists.mysql.com/php/unsubscribe.php