G'day all,

After looking at various methods, I decided upon the one MySQL uses, which it the RLIKE kerword.

Here is a snapshot of todays CVS with support for regex searching using RLIKE.

http://www.wasp.net.au/~brad/sqlite-110104-snapshot-bkc1.tgz

I have found that putting a * at the start of the regex will cause it to segfault. This appears to be a bug in my glibc regex library, but as it's not really a regex anyway it's probably not an issue yet. I will keep looking at it though.

You can compile the regex support out with --disable-regex if you wish.
The configure script does a cursory check to see that regexec works and if it does it includes the regex support.


It passes make fulltest here anyway and works with all the database files I have.

It is not as optimal as it could be yet as it compiles the regex on every call to sqliteRlikeCompare, but depending on how it profiles (When I get profiling working) I plan to implement some form of compile regex caching to try and speed things up a bit if it's required.

I have included the preliminary patch here for comment.

Regards,
Brad

Index: configure.ac
===================================================================
RCS file: /sqlite/sqlite/configure.ac,v
retrieving revision 1.7
diff -u -r1.7 configure.ac
--- configure.ac        19 Dec 2003 20:09:54 -0000      1.7
+++ configure.ac        11 Jan 2004 08:01:43 -0000
@@ -540,6 +540,17 @@
 AC_CHECK_FUNC(usleep, [TARGET_CFLAGS="$TARGET_CFLAGS -DHAVE_USLEEP=1"])

#########
+# Check if we have regex support and if we want regex support
+#
+AC_ARG_ENABLE(regex,
+[ --enable-regex Include RLIKE regex queries],,enable_regex=yes)
+AC_MSG_CHECKING([whether to support RLIKE regex queries])
+AC_MSG_RESULT($enable_regex);
+if test "$enable_regex" = "yes"; then
+ AC_CHECK_FUNC(regexec, [TARGET_CFLAGS="$TARGET_CFLAGS -DHAVE_REGEX=1"])
+fi
+
+#########
# Generate the output files.
#
AC_OUTPUT([
Index: src/expr.c
===================================================================
RCS file: /sqlite/sqlite/src/expr.c,v
retrieving revision 1.103
diff -u -r1.103 expr.c
--- src/expr.c 6 Jan 2004 01:13:46 -0000 1.103
+++ src/expr.c 11 Jan 2004 08:01:44 -0000
@@ -751,6 +751,11 @@
*pnName = 4;
break;
}
+ case TK_RLIKE: {
+ *pzName = "rlike";
+ *pnName = 5;
+ break;
+ }
case TK_GLOB: {
*pzName = "glob";
*pnName = 4;
@@ -779,6 +784,7 @@
switch( pExpr->op ){
case TK_GLOB:
case TK_LIKE:
+ case TK_RLIKE:
case TK_FUNCTION: {
int n = pExpr->pList ? pExpr->pList->nExpr : 0; /* Number of arguments */
int no_such_func = 0; /* True if no such function exists */
@@ -914,6 +920,7 @@
case TK_BETWEEN:
case TK_GLOB:
case TK_LIKE:
+ case TK_RLIKE:
return SQLITE_SO_NUM;


     case TK_STRING:
@@ -1144,6 +1151,7 @@
     }
     case TK_GLOB:
     case TK_LIKE:
+    case TK_RLIKE:
     case TK_FUNCTION: {
       int i;
       ExprList *pList = pExpr->pList;
Index: src/func.c
===================================================================
RCS file: /sqlite/sqlite/src/func.c,v
retrieving revision 1.35
diff -u -r1.35 func.c
--- src/func.c  2 Jan 2004 13:17:49 -0000       1.35
+++ src/func.c  11 Jan 2004 08:01:45 -0000
@@ -223,6 +223,24 @@
                       (const unsigned char*)argv[1]));
 }

+#ifdef HAVE_REGEX
+/*
+ * ** Implementation of the rlike() SQL function. This function implements
+ * ** the RLIKE operator. The first argument to the function is the
+ * ** string and the second argument is the pattern. So, the SQL statements:
+ * **
+ * ** A RLIKE B
+ * **
+ * ** is implemented as rike(A,B).
+ * */
+static void rlikeFunc(sqlite_func *context, int arg, const char **argv){
+ if( argv[0]==0 || argv[1]==0 ) return;
+ sqlite_set_result_int(context,
+ sqliteRlikeCompare((const unsigned char*)argv[0],
+ (const unsigned char*)argv[1]));
+}
+#endif
+
/*
** Implementation of the glob() SQL function. This function implements
** the build-in GLOB operator. The first argument to the function is the
@@ -566,6 +584,9 @@
{ "ifnull", 2, SQLITE_ARGS, ifnullFunc },
{ "random", -1, SQLITE_NUMERIC, randomFunc },
{ "like", 2, SQLITE_NUMERIC, likeFunc },
+#ifdef HAVE_REGEX
+ { "rlike", 2, SQLITE_NUMERIC, rlikeFunc },
+#endif
{ "glob", 2, SQLITE_NUMERIC, globFunc },
{ "nullif", 2, SQLITE_ARGS, nullifFunc },
{ "sqlite_version",0,SQLITE_TEXT, versionFunc},
Index: src/parse.y
===================================================================
RCS file: /sqlite/sqlite/src/parse.y,v
retrieving revision 1.105
diff -u -r1.105 parse.y
--- src/parse.y 6 Dec 2003 21:43:56 -0000 1.105
+++ src/parse.y 11 Jan 2004 08:01:46 -0000
@@ -128,7 +128,7 @@
%fallback ID
ABORT AFTER ASC ATTACH BEFORE BEGIN CASCADE CLUSTER CONFLICT
COPY DATABASE DEFERRED DELIMITERS DESC DETACH EACH END EXPLAIN FAIL FOR
- GLOB IGNORE IMMEDIATE INITIALLY INSTEAD LIKE MATCH KEY
+ GLOB IGNORE IMMEDIATE INITIALLY INSTEAD LIKE RLIKE MATCH KEY
OF OFFSET PRAGMA RAISE REPLACE RESTRICT ROW STATEMENT
TEMP TRIGGER VACUUM VIEW.


@@ -508,7 +508,7 @@
 %left OR.
 %left AND.
 %right NOT.
-%left EQ NE ISNULL NOTNULL IS LIKE GLOB BETWEEN IN.
+%left EQ NE ISNULL NOTNULL IS LIKE RLIKE GLOB BETWEEN IN.
 %left GT GE LT LE.
 %left BITAND BITOR LSHIFT RSHIFT.
 %left PLUS MINUS.
@@ -580,6 +580,7 @@
 %type likeop {int}
 likeop(A) ::= LIKE. {A = TK_LIKE;}
 likeop(A) ::= GLOB. {A = TK_GLOB;}
+likeop(A) ::= RLIKE. {A = TK_RLIKE;}
 expr(A) ::= expr(X) PLUS expr(Y).  {A = sqliteExpr(TK_PLUS, X, Y, 0);}
 expr(A) ::= expr(X) MINUS expr(Y). {A = sqliteExpr(TK_MINUS, X, Y, 0);}
 expr(A) ::= expr(X) STAR expr(Y).  {A = sqliteExpr(TK_STAR, X, Y, 0);}
Index: src/tokenize.c
===================================================================
RCS file: /sqlite/sqlite/src/tokenize.c,v
retrieving revision 1.67
diff -u -r1.67 tokenize.c
--- src/tokenize.c      8 Jan 2004 02:17:33 -0000       1.67
+++ src/tokenize.c      11 Jan 2004 08:01:46 -0000
@@ -118,6 +118,9 @@
   { "REPLACE",           TK_REPLACE,      },
   { "RESTRICT",          TK_RESTRICT,     },
   { "RIGHT",             TK_JOIN_KW,      },
+#ifdef HAVE_REGEX
+  { "RLIKE",              TK_RLIKE,        },
+#endif
   { "ROLLBACK",          TK_ROLLBACK,     },
   { "ROW",               TK_ROW,          },
   { "SELECT",            TK_SELECT,       },
Index: src/util.c
===================================================================
RCS file: /sqlite/sqlite/src/util.c,v
retrieving revision 1.72
diff -u -r1.72 util.c
--- src/util.c  7 Jan 2004 03:41:04 -0000       1.72
+++ src/util.c  11 Jan 2004 08:01:48 -0000
@@ -19,6 +19,9 @@
 #include "sqliteInt.h"
 #include <stdarg.h>
 #include <ctype.h>
+#ifdef HAVE_REGEX
+#include <regex.h>
+#endif

/*
** If malloc() ever fails, this global variable gets set to 1.
@@ -1177,6 +1180,31 @@
}
return *zString==0;
}
+
+#ifdef HAVE_REGEX
+/*
+** Implement a regex based compare between a string and a pattern
+** This implements a MySQL compatible RLIKE operator
+*/
+
+int
+sqliteRlikeCompare(const unsigned char *zPattern, const unsigned char *zString){
+ regex_t regex;
+ int res;
+ if ( zPattern==NULL || zString==NULL ) {
+ printf("One of the args was null!\n");
+ return 0;
+ }
+ res = regcomp(&regex, zPattern, REG_EXTENDED);
+ if ( res != 0 ) {
+ printf("Regcomp failed with code &u on string %s\n",res,zPattern);
+ return 0;
+ }
+ res = (regexec(&regex, zString, 0, NULL, 0)==0);
+ regfree(&regex);
+ return res;
+}
+#endif


 /*
 ** Change the sqlite.magic from SQLITE_MAGIC_OPEN to SQLITE_MAGIC_BUSY.


--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]



Reply via email to