After looking at various methods, I decided upon the one MySQL uses, which it the RLIKE kerword.
Here is a snapshot of todays CVS with support for regex searching using RLIKE.
http://www.wasp.net.au/~brad/sqlite-110104-snapshot-bkc1.tgz
I have found that putting a * at the start of the regex will cause it to segfault. This appears to be a bug in my glibc regex library, but as it's not really a regex anyway it's probably not an issue yet. I will keep looking at it though.
You can compile the regex support out with --disable-regex if you wish.
The configure script does a cursory check to see that regexec works and if it does it includes the regex support.
It passes make fulltest here anyway and works with all the database files I have.
It is not as optimal as it could be yet as it compiles the regex on every call to sqliteRlikeCompare, but depending on how it profiles (When I get profiling working) I plan to implement some form of compile regex caching to try and speed things up a bit if it's required.
I have included the preliminary patch here for comment.
Regards, Brad
Index: configure.ac =================================================================== RCS file: /sqlite/sqlite/configure.ac,v retrieving revision 1.7 diff -u -r1.7 configure.ac --- configure.ac 19 Dec 2003 20:09:54 -0000 1.7 +++ configure.ac 11 Jan 2004 08:01:43 -0000 @@ -540,6 +540,17 @@ AC_CHECK_FUNC(usleep, [TARGET_CFLAGS="$TARGET_CFLAGS -DHAVE_USLEEP=1"])
#########
+# Check if we have regex support and if we want regex support
+#
+AC_ARG_ENABLE(regex,
+[ --enable-regex Include RLIKE regex queries],,enable_regex=yes)
+AC_MSG_CHECKING([whether to support RLIKE regex queries])
+AC_MSG_RESULT($enable_regex);
+if test "$enable_regex" = "yes"; then
+ AC_CHECK_FUNC(regexec, [TARGET_CFLAGS="$TARGET_CFLAGS -DHAVE_REGEX=1"])
+fi
+
+#########
# Generate the output files.
#
AC_OUTPUT([
Index: src/expr.c
===================================================================
RCS file: /sqlite/sqlite/src/expr.c,v
retrieving revision 1.103
diff -u -r1.103 expr.c
--- src/expr.c 6 Jan 2004 01:13:46 -0000 1.103
+++ src/expr.c 11 Jan 2004 08:01:44 -0000
@@ -751,6 +751,11 @@
*pnName = 4;
break;
}
+ case TK_RLIKE: {
+ *pzName = "rlike";
+ *pnName = 5;
+ break;
+ }
case TK_GLOB: {
*pzName = "glob";
*pnName = 4;
@@ -779,6 +784,7 @@
switch( pExpr->op ){
case TK_GLOB:
case TK_LIKE:
+ case TK_RLIKE:
case TK_FUNCTION: {
int n = pExpr->pList ? pExpr->pList->nExpr : 0; /* Number of arguments */
int no_such_func = 0; /* True if no such function exists */
@@ -914,6 +920,7 @@
case TK_BETWEEN:
case TK_GLOB:
case TK_LIKE:
+ case TK_RLIKE:
return SQLITE_SO_NUM;
case TK_STRING: @@ -1144,6 +1151,7 @@ } case TK_GLOB: case TK_LIKE: + case TK_RLIKE: case TK_FUNCTION: { int i; ExprList *pList = pExpr->pList; Index: src/func.c =================================================================== RCS file: /sqlite/sqlite/src/func.c,v retrieving revision 1.35 diff -u -r1.35 func.c --- src/func.c 2 Jan 2004 13:17:49 -0000 1.35 +++ src/func.c 11 Jan 2004 08:01:45 -0000 @@ -223,6 +223,24 @@ (const unsigned char*)argv[1])); }
+#ifdef HAVE_REGEX
+/*
+ * ** Implementation of the rlike() SQL function. This function implements
+ * ** the RLIKE operator. The first argument to the function is the
+ * ** string and the second argument is the pattern. So, the SQL statements:
+ * **
+ * ** A RLIKE B
+ * **
+ * ** is implemented as rike(A,B).
+ * */
+static void rlikeFunc(sqlite_func *context, int arg, const char **argv){
+ if( argv[0]==0 || argv[1]==0 ) return;
+ sqlite_set_result_int(context,
+ sqliteRlikeCompare((const unsigned char*)argv[0],
+ (const unsigned char*)argv[1]));
+}
+#endif
+
/*
** Implementation of the glob() SQL function. This function implements
** the build-in GLOB operator. The first argument to the function is the
@@ -566,6 +584,9 @@
{ "ifnull", 2, SQLITE_ARGS, ifnullFunc },
{ "random", -1, SQLITE_NUMERIC, randomFunc },
{ "like", 2, SQLITE_NUMERIC, likeFunc },
+#ifdef HAVE_REGEX
+ { "rlike", 2, SQLITE_NUMERIC, rlikeFunc },
+#endif
{ "glob", 2, SQLITE_NUMERIC, globFunc },
{ "nullif", 2, SQLITE_ARGS, nullifFunc },
{ "sqlite_version",0,SQLITE_TEXT, versionFunc},
Index: src/parse.y
===================================================================
RCS file: /sqlite/sqlite/src/parse.y,v
retrieving revision 1.105
diff -u -r1.105 parse.y
--- src/parse.y 6 Dec 2003 21:43:56 -0000 1.105
+++ src/parse.y 11 Jan 2004 08:01:46 -0000
@@ -128,7 +128,7 @@
%fallback ID
ABORT AFTER ASC ATTACH BEFORE BEGIN CASCADE CLUSTER CONFLICT
COPY DATABASE DEFERRED DELIMITERS DESC DETACH EACH END EXPLAIN FAIL FOR
- GLOB IGNORE IMMEDIATE INITIALLY INSTEAD LIKE MATCH KEY
+ GLOB IGNORE IMMEDIATE INITIALLY INSTEAD LIKE RLIKE MATCH KEY
OF OFFSET PRAGMA RAISE REPLACE RESTRICT ROW STATEMENT
TEMP TRIGGER VACUUM VIEW.
@@ -508,7 +508,7 @@ %left OR. %left AND. %right NOT. -%left EQ NE ISNULL NOTNULL IS LIKE GLOB BETWEEN IN. +%left EQ NE ISNULL NOTNULL IS LIKE RLIKE GLOB BETWEEN IN. %left GT GE LT LE. %left BITAND BITOR LSHIFT RSHIFT. %left PLUS MINUS. @@ -580,6 +580,7 @@ %type likeop {int} likeop(A) ::= LIKE. {A = TK_LIKE;} likeop(A) ::= GLOB. {A = TK_GLOB;} +likeop(A) ::= RLIKE. {A = TK_RLIKE;} expr(A) ::= expr(X) PLUS expr(Y). {A = sqliteExpr(TK_PLUS, X, Y, 0);} expr(A) ::= expr(X) MINUS expr(Y). {A = sqliteExpr(TK_MINUS, X, Y, 0);} expr(A) ::= expr(X) STAR expr(Y). {A = sqliteExpr(TK_STAR, X, Y, 0);} Index: src/tokenize.c =================================================================== RCS file: /sqlite/sqlite/src/tokenize.c,v retrieving revision 1.67 diff -u -r1.67 tokenize.c --- src/tokenize.c 8 Jan 2004 02:17:33 -0000 1.67 +++ src/tokenize.c 11 Jan 2004 08:01:46 -0000 @@ -118,6 +118,9 @@ { "REPLACE", TK_REPLACE, }, { "RESTRICT", TK_RESTRICT, }, { "RIGHT", TK_JOIN_KW, }, +#ifdef HAVE_REGEX + { "RLIKE", TK_RLIKE, }, +#endif { "ROLLBACK", TK_ROLLBACK, }, { "ROW", TK_ROW, }, { "SELECT", TK_SELECT, }, Index: src/util.c =================================================================== RCS file: /sqlite/sqlite/src/util.c,v retrieving revision 1.72 diff -u -r1.72 util.c --- src/util.c 7 Jan 2004 03:41:04 -0000 1.72 +++ src/util.c 11 Jan 2004 08:01:48 -0000 @@ -19,6 +19,9 @@ #include "sqliteInt.h" #include <stdarg.h> #include <ctype.h> +#ifdef HAVE_REGEX +#include <regex.h> +#endif
/*
** If malloc() ever fails, this global variable gets set to 1.
@@ -1177,6 +1180,31 @@
}
return *zString==0;
}
+
+#ifdef HAVE_REGEX
+/*
+** Implement a regex based compare between a string and a pattern
+** This implements a MySQL compatible RLIKE operator
+*/
+
+int
+sqliteRlikeCompare(const unsigned char *zPattern, const unsigned char *zString){
+ regex_t regex;
+ int res;
+ if ( zPattern==NULL || zString==NULL ) {
+ printf("One of the args was null!\n");
+ return 0;
+ }
+ res = regcomp(®ex, zPattern, REG_EXTENDED);
+ if ( res != 0 ) {
+ printf("Regcomp failed with code &u on string %s\n",res,zPattern);
+ return 0;
+ }
+ res = (regexec(®ex, zString, 0, NULL, 0)==0);
+ regfree(®ex);
+ return res;
+}
+#endif
/* ** Change the sqlite.magic from SQLITE_MAGIC_OPEN to SQLITE_MAGIC_BUSY.
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]