Changeset: 4ad4318de13e for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/4ad4318de13e
Added Files:
        sql/scripts/90_strimps.sql
Modified Files:
        gdk/gdk_strimps.c
        monetdb5/modules/mal/batExtensions.c
        sql/backends/monet5/CMakeLists.txt
        sql/scripts/CMakeLists.txt
Branch: string_imprints
Log Message:

Initial implementation of the strimp filter


diffs (145 lines):

diff --git a/gdk/gdk_strimps.c b/gdk/gdk_strimps.c
--- a/gdk/gdk_strimps.c
+++ b/gdk/gdk_strimps.c
@@ -455,6 +455,7 @@ STRMPfilter(BAT *b, char *q)
        BUN i;
        uint64_t qbmask;
        uint64_t *ptr;
+       int zz = 0;
 
 
        if (b->tstrimps == NULL)
@@ -471,15 +472,19 @@ STRMPfilter(BAT *b, char *q)
        qbmask = STRMPmakebitstring(q, b->tstrimps);
        ptr = (uint64_t *)b->tstrimps->strimps_base;
 
-
        for (i = 0; i < b->batCount; i++) {
-               if ((*ptr & qbmask) == qbmask) {
+               if ((*(ptr + i) & qbmask) == qbmask) {
                        oid pos = i;
                        if (BUNappend(r, &pos, false) != GDK_SUCCEED)
                                goto sfilter_fail;
                }
+               else {
+                       zz++;
+               }
        }
+       printf("filtered out: %d entries\n", zz);
 
+       r->tkey = true;
        return virtualize(r);
 
 
diff --git a/monetdb5/modules/mal/batExtensions.c 
b/monetdb5/modules/mal/batExtensions.c
--- a/monetdb5/modules/mal/batExtensions.c
+++ b/monetdb5/modules/mal/batExtensions.c
@@ -340,7 +340,7 @@ PATstrimp_makehist(Client cntxt, MalBlkP
 }
 #endif
 static str
-PATstrimp(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
+PATstrimpCreate(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
 {
        bat bid;
        BAT *b;
@@ -358,6 +358,50 @@ PATstrimp(Client cntxt, MalBlkPtr mb, Ma
        return MAL_SUCCEED;
 }
 
+static str
+PATstrimpFilter(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) {
+       (void)cntxt;
+       (void)mb;
+       (void)stk;
+       (void)pci;
+       throw(MAL, "bat.strimpfilter", SQLSTATE(HY002) "UNIMPLEMENTED");
+}
+
+static str
+PATstrimpFilterSelect(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
+{
+       bat bid, sid;
+       BAT *b, *s, *ob;
+       str pat;
+
+       (void)cntxt;
+       (void)mb;
+
+       bid = *getArgReference_bat(stk, pci, 1);
+       if ((b = BATdescriptor(bid)) == NULL)
+               throw(MAL, "bat.strimpfilter", SQLSTATE(HY002) 
RUNTIME_OBJECT_MISSING);
+
+       sid = *getArgReference_bat(stk, pci, 2);
+       if ((s = BATdescriptor(sid)) == NULL)
+               throw(MAL, "bat.strimpfilter", SQLSTATE(HY002) 
RUNTIME_OBJECT_MISSING);
+
+       assert(s->ttype == TYPE_void);
+
+       if (!STRMPcreate(b)) {
+               throw(MAL, "bat.strimpfilter", SQLSTATE(HY002) 
OPERATION_FAILED);
+       }
+
+       pat = *getArgReference_str(stk, pci, 3);
+       if ((ob = STRMPfilter(b, pat)) == NULL) {
+               BBPunfix(b->batCacheid);
+               throw(MAL, "bat.strimpfilter", SQLSTATE(HY002));
+       }
+
+       *getArgReference_bat(stk, pci, 0) = ob->batCacheid;
+       BBPkeepref(ob->batCacheid);
+
+       return MAL_SUCCEED;
+}
 
 #include "mel.h"
 mel_func batExtensions_init_funcs[] = {
@@ -392,7 +436,10 @@ mel_func batExtensions_init_funcs[] = {
  /* String imprints */
  // pattern("bat", "strimpNDigrams", PATstrimp_ndigrams, false, "count digrams 
in a string bat", args(1,2,arg("",lng),batarg("b",str))),
  // pattern("bat", "strimpHistogram", PATstrimp_makehist, false, "make a 
histogram of all the byte pairs in a BAT", args(2,3,arg("",lng), 
batarg("",lng),batarg("b",str))),
- pattern("bat", "strimp", PATstrimp, false, "construct the strimp a BAT", 
args(1,2,arg("",void),batarg("b",str))),
+ pattern("bat", "mkstrimp", PATstrimpCreate, false, "construct the strimp a 
BAT", args(1,2,arg("",void),batarg("b",str))),
+ pattern("bat", "strimpfilter", PATstrimpFilter, false, "", 
args(1,3,arg("",bit),arg("b",str),arg("q",str))),
+ pattern("bat", "strimpfilterselect", PATstrimpFilterSelect, false, "", 
args(1,5,batarg("",oid),batarg("b",str),batarg("s",oid),arg("q",str),arg("a",bit))),
+ pattern("bat", "strimpfilterjoin", PATstrimpFilter, false, "", 
args(2,8,batarg("",oid),batarg("b",str),arg("q",str))),
  { .imp=NULL }
 };
 #include "mal_import.h"
diff --git a/sql/backends/monet5/CMakeLists.txt 
b/sql/backends/monet5/CMakeLists.txt
--- a/sql/backends/monet5/CMakeLists.txt
+++ b/sql/backends/monet5/CMakeLists.txt
@@ -40,7 +40,8 @@ set(include_sql_files
   75_storagemodel
   76_dump
   80_statistics
-  81_tracer)
+  81_tracer
+  90_strimps)
 
 if(HAVE_HGE)
   list(APPEND include_sql_files
diff --git a/sql/scripts/90_strimps.sql b/sql/scripts/90_strimps.sql
new file mode 100644
--- /dev/null
+++ b/sql/scripts/90_strimps.sql
@@ -0,0 +1,8 @@
+create schema strimps;
+
+-- create procedure strimps.strmpcreate(b string)
+-- external name bat.strimpCreate;
+-- grant execute on procedure strimps.strmpcreate to public;
+
+CREATE FILTER FUNCTION strimps.filter(strs STRING, q STRING) EXTERNAL NAME 
bat.strimpfilter;
+GRANT EXECUTE ON FILTER FUNCTION strimps.filter TO PUBLIC;
diff --git a/sql/scripts/CMakeLists.txt b/sql/scripts/CMakeLists.txt
--- a/sql/scripts/CMakeLists.txt
+++ b/sql/scripts/CMakeLists.txt
@@ -35,6 +35,7 @@ install(FILES
   75_storagemodel.sql
   80_statistics.sql
   81_tracer.sql
+  90_strimps.sql
   99_system.sql
   DESTINATION ${CMAKE_INSTALL_LIBDIR}/monetdb5/createdb)
 
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to