Changeset: 139143c85939 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/139143c85939 Modified Files: gdk/gdk_strimps.c gdk/gdk_strimps.h monetdb5/modules/mal/strimps.c sql/backends/monet5/sql_strimps.c Branch: string_imprints Log Message:
strimp creation and filtering should work with candidates This commit changes the interface of the GDK functions and how they are called. diffs (190 lines): diff --git a/gdk/gdk_strimps.c b/gdk/gdk_strimps.c --- a/gdk/gdk_strimps.c +++ b/gdk/gdk_strimps.c @@ -238,10 +238,10 @@ STRMPchoosePairs(PairHistogramElem *hist } static bool -STRMPbuildHeader(BAT *b, CharPair *hpairs) { +STRMPbuildHeader(BAT *b, BAT *s, CharPair *hpairs) { lng t0 = 0; BATiter bi; - str s; + str cs; BUN i; size_t hidx; size_t hlen; @@ -249,6 +249,8 @@ STRMPbuildHeader(BAT *b, CharPair *hpair PairIterator pi, *pip; CharPair cp, *cpp; + (void)s; + TRC_DEBUG_IF(ACCELERATOR) t0 = GDKusec(); hlen = STRIMP_HISTSIZE; if ((hist = (PairHistogramElem *)GDKmalloc(hlen*sizeof(PairHistogramElem))) == NULL) { @@ -266,9 +268,9 @@ STRMPbuildHeader(BAT *b, CharPair *hpair pip = π cpp = &cp; for (i = 0; i < b->batCount; i++) { - s = (str)BUNtvar(bi, i); - if (!strNil(s)) { - pi.s = s; + cs = (str)BUNtvar(bi, i); + if (!strNil(cs)) { + pi.s = cs; pi.pos = 0; pi.lim = strlen(pi.s); while (pair_at(pip, cpp)) { @@ -324,7 +326,7 @@ STRMPbuildHeader(BAT *b, CharPair *hpair /* Create the heap for a string imprint. Returns NULL on failure. */ static Strimps * -STRMPcreateStrimpHeap(BAT *b) +STRMPcreateStrimpHeap(BAT *b, BAT *s) { uint8_t *h1, *h2; Strimps *r = NULL; @@ -338,7 +340,7 @@ STRMPcreateStrimpHeap(BAT *b) MT_lock_set(&b->batIdxLock); /* Make sure no other thread got here first */ if (b->tstrimps == NULL) { - STRMPbuildHeader(b, hpairs); /* Find the header pairs */ + STRMPbuildHeader(b, s, hpairs); /* Find the header pairs */ sz = 8 + STRIMP_HEADER_SIZE; /* add 8-bytes for the descriptor and the pair sizes */ for (i = 0; i < STRIMP_HEADER_SIZE; i++) { sz += hpairs[i].psize; @@ -459,13 +461,14 @@ BATcheckstrimps(BAT *b) * list. */ BAT * -STRMPfilter(BAT *b, char *q) +STRMPfilter(BAT *b, BAT *s, char *q) { BAT *r = NULL; BUN i; uint64_t qbmask; uint64_t *ptr; Strimps *strmps; + (void)s; if (isVIEW(b)) { // b = BBP_cache(VIEWtparent(b)); @@ -583,12 +586,12 @@ static ATOMIC_TYPE STRMPnthread = ATOMIC /* Create */ gdk_return -STRMPcreate(BAT *b) +STRMPcreate(BAT *b, BAT *s) { lng t0 = 0; BATiter bi; BUN i; - str s; + str cs; Strimps *h; uint64_t *dh; BAT *pb; @@ -611,16 +614,16 @@ STRMPcreate(BAT *b) if (BATcheckstrimps(pb)) return GDK_SUCCEED; - if ((h = STRMPcreateStrimpHeap(pb)) == NULL) { + if ((h = STRMPcreateStrimpHeap(pb, s)) == NULL) { return GDK_FAIL; } dh = (uint64_t *)((uint8_t*)h->strimps.base + h->strimps.free + b->hseqbase*8); bi = bat_iterator(b); for (i = 0; i < bi.count; i++) { - s = (str)BUNtvar(bi, i); - if (!strNil(s)) - *dh++ = STRMPmakebitstring(s, h); + cs = (str)BUNtvar(bi, i); + if (!strNil(cs)) + *dh++ = STRMPmakebitstring(cs, h); else *dh++ = 0; /* no pairs in nil values */ } diff --git a/gdk/gdk_strimps.h b/gdk/gdk_strimps.h --- a/gdk/gdk_strimps.h +++ b/gdk/gdk_strimps.h @@ -42,6 +42,6 @@ typedef struct { // gdk_export gdk_return STRMPmakehistogramBP(BAT *b, uint64_t *hist, size_t hist_size, size_t *nbins); // make static // gdk_export gdk_return STRMP_make_header(StrimpHeader *h, uint64_t *hist, size_t hist_size); // make static // gdk_export gdk_return STRMP_make_header(BAT *b); -gdk_export gdk_return STRMPcreate(BAT *b); -gdk_export BAT *STRMPfilter(BAT *b, char *q); +gdk_export gdk_return STRMPcreate(BAT *b, BAT *s); +gdk_export BAT *STRMPfilter(BAT *b, BAT *s, char *q); #endif /* _GDK_STRIMPS_H_ */ diff --git a/monetdb5/modules/mal/strimps.c b/monetdb5/modules/mal/strimps.c --- a/monetdb5/modules/mal/strimps.c +++ b/monetdb5/modules/mal/strimps.c @@ -79,8 +79,8 @@ PATstrimp_makehist(Client cntxt, MalBlkP static str PATstrimpCreate(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) { - bat bid; - BAT *b; + bat bid, sid; + BAT *b, *s; (void)cntxt; (void)mb; @@ -88,7 +88,11 @@ PATstrimpCreate(Client cntxt, MalBlkPtr if ((b = BATdescriptor(bid)) == NULL) throw(MAL, "strimps.strimpCreate", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING); - if(STRMPcreate(b) != GDK_SUCCEED) + sid = *getArgReference_bat(stk, pci, 2); + if ((s = BATdescriptor(sid)) == NULL) + throw(MAL, "strimps.strimpCreate", SQLSTATE(HY002) RUNTIME_OBJECT_MISSING); + + if(STRMPcreate(b, s) != GDK_SUCCEED) throw(MAL, "strimps.strimpCreate", SQLSTATE(HY002) OPERATION_FAILED); // *getArgReference_lng(stk, pci, 0) = 0; @@ -124,11 +128,11 @@ PATstrimpFilterSelect(Client cntxt, MalB assert(s->ttype == TYPE_void); - if(STRMPcreate(b) != GDK_SUCCEED) + if(STRMPcreate(b, s) != GDK_SUCCEED) throw(MAL, "strimps.strimpfilter", SQLSTATE(HY002) "strimp creation failed"); pat = *getArgReference_str(stk, pci, 3); - if ((ob = STRMPfilter(b, pat)) == NULL) { + if ((ob = STRMPfilter(b, s, pat)) == NULL) { BBPunfix(b->batCacheid); throw(MAL, "strimps.strimpfilter", SQLSTATE(HY002) "filtering failed"); } @@ -144,7 +148,7 @@ mel_func strimp_init_funcs[] = { /* String imprints */ // pattern("bat", "strimpNDigrams", PATstrimp_ndigrams, false, "count digrams in a string bat", args(1,2,arg("",lng),batarg("b",str))), // pattern("bat", "strimpHistogram", PATstrimp_makehist, false, "make a histogram of all the byte pairs in a BAT", args(2,3,arg("",lng), batarg("",lng),batarg("b",str))), - pattern("strimps", "mkstrimp", PATstrimpCreate, false, "construct the strimp a BAT", args(1,2,arg("",void),batarg("b",str))), + pattern("strimps", "mkstrimp", PATstrimpCreate, false, "construct the strimp a BAT", args(1,3,arg("",void),batarg("b",str),batarg("s",oid))), pattern("strimps", "strimpfilter", PATstrimpFilter, false, "", args(1,3,arg("",bit),arg("b",str),arg("q",str))), pattern("strimps", "strimpfilterselect", PATstrimpFilterSelect, false, "", args(1,5,batarg("",oid),batarg("b",str),batarg("s",oid),arg("q",str),arg("a",bit))), pattern("strimps", "strimpfilterjoin", PATstrimpFilter, false, "", args(2,8,batarg("",oid),batarg("b",str),arg("q",str))), diff --git a/sql/backends/monet5/sql_strimps.c b/sql/backends/monet5/sql_strimps.c --- a/sql/backends/monet5/sql_strimps.c +++ b/sql/backends/monet5/sql_strimps.c @@ -50,12 +50,14 @@ sql_load_bat(Client cntxt, MalBlkPtr mb, str sql_createstrimps(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) { - BAT *b; + BAT *b, *s; if (sql_load_bat(cntxt, mb, stk, pci, &b) != MAL_SUCCEED) throw(SQL, "sql.createstrimps", SQLSTATE(HY002) OPERATION_FAILED); - if (STRMPcreate(b) != GDK_SUCCEED) + s = BATdense(0, 0, b->batCount); + + if (STRMPcreate(b, s) != GDK_SUCCEED) throw(SQL, "sql.createstrimps", SQLSTATE(HY002) OPERATION_FAILED); BBPunfix(b->batCacheid); _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list