Changeset: 4ad4318de13e for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/4ad4318de13e Added Files: sql/scripts/90_strimps.sql Modified Files: gdk/gdk_strimps.c monetdb5/modules/mal/batExtensions.c sql/backends/monet5/CMakeLists.txt sql/scripts/CMakeLists.txt Branch: string_imprints Log Message:
Initial implementation of the strimp filter diffs (145 lines): diff --git a/gdk/gdk_strimps.c b/gdk/gdk_strimps.c --- a/gdk/gdk_strimps.c +++ b/gdk/gdk_strimps.c @@ -455,6 +455,7 @@ STRMPfilter(BAT *b, char *q) BUN i; uint64_t qbmask; uint64_t *ptr; + int zz = 0; if (b->tstrimps == NULL) @@ -471,15 +472,19 @@ STRMPfilter(BAT *b, char *q) qbmask = STRMPmakebitstring(q, b->tstrimps); ptr = (uint64_t *)b->tstrimps->strimps_base; - for (i = 0; i < b->batCount; i++) { - if ((*ptr & qbmask) == qbmask) { + if ((*(ptr + i) & qbmask) == qbmask) { oid pos = i; if (BUNappend(r, &pos, false) != GDK_SUCCEED) goto sfilter_fail; } + else { + zz++; + } } + printf("filtered out: %d entries\n", zz); + r->tkey = true; return virtualize(r); diff --git a/monetdb5/modules/mal/batExtensions.c b/monetdb5/modules/mal/batExtensions.c --- a/monetdb5/modules/mal/batExtensions.c +++ b/monetdb5/modules/mal/batExtensions.c @@ -340,7 +340,7 @@ PATstrimp_makehist(Client cntxt, MalBlkP } #endif static str -PATstrimp(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) +PATstrimpCreate(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) { bat bid; BAT *b; @@ -358,6 +358,50 @@ PATstrimp(Client cntxt, MalBlkPtr mb, Ma return MAL_SUCCEED; } +static str +PATstrimpFilter(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) { + (void)cntxt; + (void)mb; + (void)stk; + (void)pci; + throw(MAL, "bat.strimpfilter", SQLSTATE(HY002) "UNIMPLEMENTED"); +} + +static str +PATstrimpFilterSelect(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) +{ + bat bid, sid; + BAT *b, *s, *ob; + str pat; + + (void)cntxt; + (void)mb; + + bid = *getArgReference_bat(stk, pci, 1); + if ((b = BATdescriptor(bid)) == NULL) + throw(MAL, "bat.strimpfilter", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING); + + sid = *getArgReference_bat(stk, pci, 2); + if ((s = BATdescriptor(sid)) == NULL) + throw(MAL, "bat.strimpfilter", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING); + + assert(s->ttype == TYPE_void); + + if (!STRMPcreate(b)) { + throw(MAL, "bat.strimpfilter", SQLSTATE(HY002) OPERATION_FAILED); + } + + pat = *getArgReference_str(stk, pci, 3); + if ((ob = STRMPfilter(b, pat)) == NULL) { + BBPunfix(b->batCacheid); + throw(MAL, "bat.strimpfilter", SQLSTATE(HY002)); + } + + *getArgReference_bat(stk, pci, 0) = ob->batCacheid; + BBPkeepref(ob->batCacheid); + + return MAL_SUCCEED; +} #include "mel.h" mel_func batExtensions_init_funcs[] = { @@ -392,7 +436,10 @@ mel_func batExtensions_init_funcs[] = { /* String imprints */ // pattern("bat", "strimpNDigrams", PATstrimp_ndigrams, false, "count digrams in a string bat", args(1,2,arg("",lng),batarg("b",str))), // pattern("bat", "strimpHistogram", PATstrimp_makehist, false, "make a histogram of all the byte pairs in a BAT", args(2,3,arg("",lng), batarg("",lng),batarg("b",str))), - pattern("bat", "strimp", PATstrimp, false, "construct the strimp a BAT", args(1,2,arg("",void),batarg("b",str))), + pattern("bat", "mkstrimp", PATstrimpCreate, false, "construct the strimp a BAT", args(1,2,arg("",void),batarg("b",str))), + pattern("bat", "strimpfilter", PATstrimpFilter, false, "", args(1,3,arg("",bit),arg("b",str),arg("q",str))), + pattern("bat", "strimpfilterselect", PATstrimpFilterSelect, false, "", args(1,5,batarg("",oid),batarg("b",str),batarg("s",oid),arg("q",str),arg("a",bit))), + pattern("bat", "strimpfilterjoin", PATstrimpFilter, false, "", args(2,8,batarg("",oid),batarg("b",str),arg("q",str))), { .imp=NULL } }; #include "mal_import.h" diff --git a/sql/backends/monet5/CMakeLists.txt b/sql/backends/monet5/CMakeLists.txt --- a/sql/backends/monet5/CMakeLists.txt +++ b/sql/backends/monet5/CMakeLists.txt @@ -40,7 +40,8 @@ set(include_sql_files 75_storagemodel 76_dump 80_statistics - 81_tracer) + 81_tracer + 90_strimps) if(HAVE_HGE) list(APPEND include_sql_files diff --git a/sql/scripts/90_strimps.sql b/sql/scripts/90_strimps.sql new file mode 100644 --- /dev/null +++ b/sql/scripts/90_strimps.sql @@ -0,0 +1,8 @@ +create schema strimps; + +-- create procedure strimps.strmpcreate(b string) +-- external name bat.strimpCreate; +-- grant execute on procedure strimps.strmpcreate to public; + +CREATE FILTER FUNCTION strimps.filter(strs STRING, q STRING) EXTERNAL NAME bat.strimpfilter; +GRANT EXECUTE ON FILTER FUNCTION strimps.filter TO PUBLIC; diff --git a/sql/scripts/CMakeLists.txt b/sql/scripts/CMakeLists.txt --- a/sql/scripts/CMakeLists.txt +++ b/sql/scripts/CMakeLists.txt @@ -35,6 +35,7 @@ install(FILES 75_storagemodel.sql 80_statistics.sql 81_tracer.sql + 90_strimps.sql 99_system.sql DESTINATION ${CMAKE_INSTALL_LIBDIR}/monetdb5/createdb) _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list