Changeset: 380efa80f753 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=380efa80f753
Added Files:
        monetdb5/modules/mal/pcre_pub.h
        monetdb5/modules/weldudfs/Makefile.ag
        monetdb5/modules/weldudfs/weld_udfs.c
Modified Files:
        monetdb5/modules/Makefile.ag
        monetdb5/modules/mal/Makefile.ag
        monetdb5/modules/mal/pcre.c
        monetdb5/tools/Makefile.ag
        sql/backends/monet5/rel_weld.c
Branch: rel-weld
Log Message:

weld impl for likeselect with re_match_no_ignore udf


diffs (truncated from 349 to 300 lines):

diff --git a/monetdb5/modules/Makefile.ag b/monetdb5/modules/Makefile.ag
--- a/monetdb5/modules/Makefile.ag
+++ b/monetdb5/modules/Makefile.ag
@@ -4,4 +4,4 @@
 #
 # Copyright 1997 - July 2008 CWI, August 2008 - 2018 MonetDB B.V.
 
-SUBDIRS = atoms kernel mal 
+SUBDIRS = atoms kernel mal weldudfs
diff --git a/monetdb5/modules/mal/Makefile.ag b/monetdb5/modules/mal/Makefile.ag
--- a/monetdb5/modules/mal/Makefile.ag
+++ b/monetdb5/modules/mal/Makefile.ag
@@ -38,7 +38,7 @@ lib_mal = {
                manifold.c manifold.h \
                oltp.c oltp.h \
                wlc.c wlc.h \
-               pcre.c \
+               pcre.c pcre_pub.h \
                profiler.c profiler.h \
                querylog.c querylog.h \
                remote.c remote.h \
diff --git a/monetdb5/modules/mal/pcre.c b/monetdb5/modules/mal/pcre.c
--- a/monetdb5/modules/mal/pcre.c
+++ b/monetdb5/modules/mal/pcre.c
@@ -23,6 +23,7 @@
 
 #include "mal.h"
 #include "mal_exception.h"
+#include "pcre_pub.h"
 
 #ifdef HAVE_LIBPCRE
 #include <pcre.h>
@@ -79,15 +80,6 @@ mal_export str LIKEjoin1(bat *r1, bat *r
 mal_export str ILIKEjoin(bat *r1, bat *r2, const bat *lid, const bat *rid, 
const str *esc, const bat *slid, const bat *srid, const bit *nil_matches, const 
lng *estimate);
 mal_export str ILIKEjoin1(bat *r1, bat *r2, const bat *lid, const bat *rid, 
const bat *slid, const bat *srid, const bit *nil_matches, const lng *estimate);
 
-/* current implementation assumes simple %keyword% [keyw%]* */
-typedef struct RE {
-       char *k;
-       int search;
-       int skip;
-       int len;
-       struct RE *n;
-} RE;
-
 #ifndef HAVE_STRCASESTR
 static const char *
 strcasestr(const char *haystack, const char *needle)
@@ -113,7 +105,7 @@ strcasestr(const char *haystack, const c
 }
 #endif
 
-static int
+int
 re_simple(const char *pat)
 {
        int nr = 0;
@@ -156,7 +148,7 @@ re_match_ignore(const char *s, RE *patte
        return 1;
 }
 
-static int
+int
 re_match_no_ignore(const char *s, RE *pattern)
 {
        RE *r;
@@ -183,7 +175,7 @@ re_destroy(RE *p)
        }
 }
 
-static RE *
+RE *
 re_create(const char *pat, int nr)
 {
        char *x = GDKstrdup(pat);
diff --git a/monetdb5/modules/mal/pcre_pub.h b/monetdb5/modules/mal/pcre_pub.h
new file mode 100644
--- /dev/null
+++ b/monetdb5/modules/mal/pcre_pub.h
@@ -0,0 +1,20 @@
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0.  If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * Copyright 1997 - July 2008 CWI, August 2008 - 2018 MonetDB B.V.
+ */
+
+/* current implementation assumes simple %keyword% [keyw%]* */
+typedef struct RE {
+       char *k;
+       int search;
+       int skip;
+       int len;
+       struct RE *n;
+} RE;
+
+int re_simple(const char *pat);
+RE *re_create(const char *pat, int nr);
+int re_match_no_ignore(const char *s, RE *pattern);
diff --git a/monetdb5/modules/weldudfs/Makefile.ag 
b/monetdb5/modules/weldudfs/Makefile.ag
new file mode 100644
--- /dev/null
+++ b/monetdb5/modules/weldudfs/Makefile.ag
@@ -0,0 +1,19 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0.  If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+# Copyright 1997 - July 2008 CWI, August 2008 - 2018 MonetDB B.V.
+
+INCLUDES = ../../mal ../atoms ../kernel ../mal \
+       ../../../common/options \
+       ../../../common/stream \
+       ../../../common/utils \
+       ../../../gdk 
+
+MTSAFE
+
+lib_weldudfs = {
+       NOINST
+       SOURCES = \
+               weld_udfs.c
+}
diff --git a/monetdb5/modules/weldudfs/weld_udfs.c 
b/monetdb5/modules/weldudfs/weld_udfs.c
new file mode 100644
--- /dev/null
+++ b/monetdb5/modules/weldudfs/weld_udfs.c
@@ -0,0 +1,45 @@
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0.  If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * Copyright 1997 - July 2008 CWI, August 2008 - 2018 MonetDB B.V.
+ */
+
+#include "monetdb_config.h"
+#include "gdk.h"
+#include "mal.h"
+#include "pcre_pub.h"
+
+typedef struct {
+       char *data;
+       int64_t len;
+} i8vec;
+
+MT_Lock initLock MT_LOCK_INITIALIZER("udfs_init");
+
+mal_export void state_init(i8vec *op, int64_t *state_ptr);
+mal_export void like(int64_t *state_ptr, i8vec *col, i8vec *pattern, i8vec 
*exc, int8_t *result);
+
+void state_init(i8vec *op, int64_t *state_ptr) {
+       (void)op;
+       void *ptr = calloc(0, sizeof(void*));
+       *state_ptr = (int64_t)ptr;
+}
+
+void like(int64_t *state_ptr, i8vec *col, i8vec *pattern, i8vec *exc, int8_t 
*result) {
+       (void)exc;
+       int64_t *adr = (void*)*state_ptr;
+       RE *re = (RE*)(*adr);
+       if (re == NULL) {
+               MT_lock_set(&initLock);
+               if (re == NULL) {
+                       /* Create a RE struct and save it in the given mem 
location */
+                       int nr = re_simple(pattern->data);
+                       re = re_create(pattern->data, nr);
+                       *adr = (int64_t)re;
+               }
+               MT_lock_unset(&initLock);
+       }
+       *result = (int8_t)re_match_no_ignore(col->data, re);
+}
diff --git a/monetdb5/tools/Makefile.ag b/monetdb5/tools/Makefile.ag
--- a/monetdb5/tools/Makefile.ag
+++ b/monetdb5/tools/Makefile.ag
@@ -15,7 +15,7 @@ lib_monetdb5 = {
        DIR = libdir
        SOURCES = libmonetdb5.rc
        LIBS = ../mal/libmal ../modules/atoms/libatoms \
-               ../modules/kernel/libkernel ../modules/mal/libmal \
+               ../modules/kernel/libkernel ../modules/mal/libmal 
../modules/weldudfs/libweldudfs \
                ../optimizer/liboptimizer ../scheduler/libscheduler \
                ../../gdk/libbat \
                HAVE_MAPI?../../clients/mapilib/libmapi \
diff --git a/sql/backends/monet5/rel_weld.c b/sql/backends/monet5/rel_weld.c
--- a/sql/backends/monet5/rel_weld.c
+++ b/sql/backends/monet5/rel_weld.c
@@ -69,7 +69,8 @@ typedef struct {
        str program;
        unsigned long program_len;
        unsigned long program_max_len;
-       char str_cols[STR_BUF_SIZE * 3]; /* global string cols renaming */
+       char global_init[STR_BUF_SIZE * 3]; /* global stmts such as vheap.base 
renaming and udfs inits */
+       char global_cleanup[STR_BUF_SIZE * 3];
        list *stmt_list;
        sql_allocator *sa;
        int error;
@@ -137,16 +138,17 @@ get_weld_cmp(int cmp) {
 
 static str
 get_weld_func(sql_subfunc *f) {
-       if (strcmp(f->func->imp, "+") == 0 || strcmp(f->func->imp, "sum") == 0 
||
-               strcmp(f->func->imp, "count") == 0)
+       str name = f->func->imp ? f->func->imp : f->func->base.name;
+       if (strcmp(name, "+") == 0 || strcmp(name, "sum") == 0 || strcmp(name, 
"count") == 0)
                return "+";
-       else if (strcmp(f->func->imp, "-") == 0)
+       else if (strcmp(name, "-") == 0)
                return "-";
-       else if (strcmp(f->func->imp, "*") == 0 || strcmp(f->func->imp, "prod") 
== 0)
+       else if (strcmp(name, "*") == 0 || strcmp(name, "prod") == 0)
                return "*";
-       else if (strcmp(f->func->imp, "/") == 0)
+       else if (strcmp(name, "/") == 0)
                return "/";
-       /* TODO check for others that we might support through UDFs */
+       else if (strcmp(name, "like") == 0)
+               return "like";
        return NULL;
 }
 
@@ -208,8 +210,20 @@ exp_has_column(sql_exp *exp) {
                ret = exp_has_column(exp->l);
                break;
        case e_cmp:
-               if (exp->l) ret |= exp_has_column(exp->l);
-               if (exp->r) ret |= exp_has_column(exp->r);
+               if (exp->flag == cmp_filter || exp->flag == cmp_or) {
+                       for (en = ((list*)exp->l)->h; en; en = en->next) {
+                               ret |= exp_has_column(en->data);
+                       }
+               } else if (exp->l) {
+                       ret |= exp_has_column(exp->l);
+               }
+               if (exp->flag == cmp_filter || exp->flag == cmp_or || exp->flag 
== cmp_in || exp->flag == cmp_notin) {
+                       for (en = ((list*)exp->r)->h; en; en = en->next) {
+                               ret |= exp_has_column(en->data);
+                       }
+               } else if (exp->r) {
+                       ret |= exp_has_column(exp->r);
+               }
                if (exp->f) ret |= exp_has_column(exp->f);
                break;
        case e_func:
@@ -234,16 +248,45 @@ exp_to_weld(backend *be, weld_state *wst
        }
        switch (exp->type) {
        case e_convert: {
-               wprintf(wstate, "%s(", getWeldType(exp->tpe.type->localtype));
-               exp_to_weld(be, wstate, exp->l);
-               wprintf(wstate, ")");
+               str conv_to = getWeldType(exp->tpe.type->localtype);
+               if (strcmp(conv_to, "vec[i8]") == 0) {
+                       /* Do nothing */
+                       exp_to_weld(be, wstate, exp->l);
+               } else {
+                       wprintf(wstate, "%s(", conv_to);
+                       exp_to_weld(be, wstate, exp->l);
+                       wprintf(wstate, ")");
+               }
                break;
        }
        case e_cmp: {
                if (is_anti(exp)) {
                        wprintf(wstate, "(");
                }
-               if (exp->f) {
+               if (exp->flag == cmp_in || exp->flag == cmp_notin) {
+                       /* TODO implement this */
+                       wstate->error = 1;
+                       return;
+               } else if (get_cmp(exp) == cmp_or) {
+                       wprintf(wstate, "(");
+                       exps_to_weld(be, wstate, exp->l, "");
+                       wprintf(wstate, ") || ( ");
+                       exps_to_weld(be, wstate, exp->r, "");
+                       wprintf(wstate, ")");
+               } else if (get_cmp(exp) == cmp_filter) {
+                       /* Must be an udf */
+                       str udf = get_weld_func(exp->f);
+                       int state_ptr = wstate->next_var++;
+                       sprintf(wstate->global_init + 
strlen(wstate->global_init),
+                                       "let v%d = cudf[state_init, 
i64](\"%s\");", state_ptr, udf);
+                       sprintf(wstate->global_cleanup + 
strlen(wstate->global_cleanup),
+                                       "let v%d = cudf[state_cleanup, 
i64](\"%s\", v%d);", state_ptr, udf, state_ptr);
+                       wprintf(wstate, "cudf[%s, bool](v%d,", udf, state_ptr);
+                       exps_to_weld(be, wstate, exp->l, ", ");
+                       wprintf(wstate, ", ");
+                       exps_to_weld(be, wstate, exp->r, ", ");
+                       wprintf(wstate, ")");
+               } else if (exp->f) {
                        if 
(get_weld_cmp(swap_compare(range2lcompare(exp->flag))) == NULL) {
                                wstate->error = 1;
                                return;
@@ -342,9 +385,9 @@ base_table_produce(backend *be, sql_rel 
                }
                if (exp_subtype(exp)->type->localtype == TYPE_str) {
                        /* Save the vheap and stroffset names */
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to