Changeset: 139143c85939 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/139143c85939
Modified Files:
        gdk/gdk_strimps.c
        gdk/gdk_strimps.h
        monetdb5/modules/mal/strimps.c
        sql/backends/monet5/sql_strimps.c
Branch: string_imprints
Log Message:

strimp creation and filtering should work with candidates

This commit changes the interface of the GDK functions and how they are called.


diffs (190 lines):

diff --git a/gdk/gdk_strimps.c b/gdk/gdk_strimps.c
--- a/gdk/gdk_strimps.c
+++ b/gdk/gdk_strimps.c
@@ -238,10 +238,10 @@ STRMPchoosePairs(PairHistogramElem *hist
 }
 
 static bool
-STRMPbuildHeader(BAT *b, CharPair *hpairs) {
+STRMPbuildHeader(BAT *b, BAT *s, CharPair *hpairs) {
        lng t0 = 0;
        BATiter bi;
-       str s;
+       str cs;
        BUN i;
        size_t hidx;
        size_t hlen;
@@ -249,6 +249,8 @@ STRMPbuildHeader(BAT *b, CharPair *hpair
        PairIterator pi, *pip;
        CharPair cp, *cpp;
 
+       (void)s;
+
        TRC_DEBUG_IF(ACCELERATOR) t0 = GDKusec();
        hlen = STRIMP_HISTSIZE;
        if ((hist = (PairHistogramElem 
*)GDKmalloc(hlen*sizeof(PairHistogramElem))) == NULL) {
@@ -266,9 +268,9 @@ STRMPbuildHeader(BAT *b, CharPair *hpair
        pip = π
        cpp = &cp;
        for (i = 0; i < b->batCount; i++) {
-               s = (str)BUNtvar(bi, i);
-               if (!strNil(s)) {
-                       pi.s = s;
+               cs = (str)BUNtvar(bi, i);
+               if (!strNil(cs)) {
+                       pi.s = cs;
                        pi.pos = 0;
                        pi.lim = strlen(pi.s);
                        while (pair_at(pip, cpp)) {
@@ -324,7 +326,7 @@ STRMPbuildHeader(BAT *b, CharPair *hpair
 
 /* Create the heap for a string imprint. Returns NULL on failure. */
 static Strimps *
-STRMPcreateStrimpHeap(BAT *b)
+STRMPcreateStrimpHeap(BAT *b, BAT *s)
 {
        uint8_t *h1, *h2;
        Strimps *r = NULL;
@@ -338,7 +340,7 @@ STRMPcreateStrimpHeap(BAT *b)
                MT_lock_set(&b->batIdxLock);
                /* Make sure no other thread got here first */
                 if (b->tstrimps == NULL) {
-                       STRMPbuildHeader(b, hpairs); /* Find the header pairs */
+                       STRMPbuildHeader(b, s, hpairs); /* Find the header 
pairs */
                        sz = 8 + STRIMP_HEADER_SIZE; /* add 8-bytes for the 
descriptor and the pair sizes */
                        for (i = 0; i < STRIMP_HEADER_SIZE; i++) {
                                sz += hpairs[i].psize;
@@ -459,13 +461,14 @@ BATcheckstrimps(BAT *b)
  * list.
  */
 BAT *
-STRMPfilter(BAT *b, char *q)
+STRMPfilter(BAT *b, BAT *s, char *q)
 {
        BAT *r = NULL;
        BUN i;
        uint64_t qbmask;
        uint64_t *ptr;
        Strimps *strmps;
+       (void)s;
 
        if (isVIEW(b)) {
                // b = BBP_cache(VIEWtparent(b));
@@ -583,12 +586,12 @@ static ATOMIC_TYPE STRMPnthread = ATOMIC
 
 /* Create */
 gdk_return
-STRMPcreate(BAT *b)
+STRMPcreate(BAT *b, BAT *s)
 {
        lng t0 = 0;
        BATiter bi;
        BUN i;
-       str s;
+       str cs;
        Strimps *h;
        uint64_t *dh;
        BAT *pb;
@@ -611,16 +614,16 @@ STRMPcreate(BAT *b)
        if (BATcheckstrimps(pb))
                return GDK_SUCCEED;
 
-        if ((h = STRMPcreateStrimpHeap(pb)) == NULL) {
+        if ((h = STRMPcreateStrimpHeap(pb, s)) == NULL) {
                return GDK_FAIL;
        }
        dh = (uint64_t *)((uint8_t*)h->strimps.base + h->strimps.free + 
b->hseqbase*8);
 
        bi = bat_iterator(b);
        for (i = 0; i < bi.count; i++) {
-               s = (str)BUNtvar(bi, i);
-               if (!strNil(s))
-                       *dh++ = STRMPmakebitstring(s, h);
+               cs = (str)BUNtvar(bi, i);
+               if (!strNil(cs))
+                       *dh++ = STRMPmakebitstring(cs, h);
                else
                        *dh++ = 0; /* no pairs in nil values */
        }
diff --git a/gdk/gdk_strimps.h b/gdk/gdk_strimps.h
--- a/gdk/gdk_strimps.h
+++ b/gdk/gdk_strimps.h
@@ -42,6 +42,6 @@ typedef struct {
 // gdk_export gdk_return STRMPmakehistogramBP(BAT *b, uint64_t *hist, size_t 
hist_size, size_t *nbins); // make static
 // gdk_export gdk_return STRMP_make_header(StrimpHeader *h, uint64_t *hist, 
size_t hist_size); // make static
 // gdk_export gdk_return STRMP_make_header(BAT *b);
-gdk_export gdk_return STRMPcreate(BAT *b);
-gdk_export BAT *STRMPfilter(BAT *b, char *q);
+gdk_export gdk_return STRMPcreate(BAT *b, BAT *s);
+gdk_export BAT *STRMPfilter(BAT *b, BAT *s, char *q);
 #endif /* _GDK_STRIMPS_H_ */
diff --git a/monetdb5/modules/mal/strimps.c b/monetdb5/modules/mal/strimps.c
--- a/monetdb5/modules/mal/strimps.c
+++ b/monetdb5/modules/mal/strimps.c
@@ -79,8 +79,8 @@ PATstrimp_makehist(Client cntxt, MalBlkP
 static str
 PATstrimpCreate(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
 {
-       bat bid;
-       BAT *b;
+       bat bid, sid;
+       BAT *b, *s;
        (void)cntxt;
        (void)mb;
 
@@ -88,7 +88,11 @@ PATstrimpCreate(Client cntxt, MalBlkPtr 
        if ((b = BATdescriptor(bid)) == NULL)
                throw(MAL, "strimps.strimpCreate", SQLSTATE(HY002) 
RUNTIME_OBJECT_MISSING);
 
-       if(STRMPcreate(b) != GDK_SUCCEED)
+       sid = *getArgReference_bat(stk, pci, 2);
+       if ((s = BATdescriptor(sid)) == NULL)
+               throw(MAL, "strimps.strimpCreate", SQLSTATE(HY002) 
RUNTIME_OBJECT_MISSING);
+
+       if(STRMPcreate(b, s) != GDK_SUCCEED)
                throw(MAL, "strimps.strimpCreate", SQLSTATE(HY002) 
OPERATION_FAILED);
 
        // *getArgReference_lng(stk, pci, 0) = 0;
@@ -124,11 +128,11 @@ PATstrimpFilterSelect(Client cntxt, MalB
 
        assert(s->ttype == TYPE_void);
 
-       if(STRMPcreate(b) != GDK_SUCCEED)
+       if(STRMPcreate(b, s) != GDK_SUCCEED)
                throw(MAL, "strimps.strimpfilter", SQLSTATE(HY002) "strimp 
creation failed");
 
        pat = *getArgReference_str(stk, pci, 3);
-       if ((ob = STRMPfilter(b, pat)) == NULL) {
+       if ((ob = STRMPfilter(b, s, pat)) == NULL) {
                BBPunfix(b->batCacheid);
                throw(MAL, "strimps.strimpfilter", SQLSTATE(HY002) "filtering 
failed");
        }
@@ -144,7 +148,7 @@ mel_func strimp_init_funcs[] = {
  /* String imprints */
  // pattern("bat", "strimpNDigrams", PATstrimp_ndigrams, false, "count digrams 
in a string bat", args(1,2,arg("",lng),batarg("b",str))),
  // pattern("bat", "strimpHistogram", PATstrimp_makehist, false, "make a 
histogram of all the byte pairs in a BAT", args(2,3,arg("",lng), 
batarg("",lng),batarg("b",str))),
- pattern("strimps", "mkstrimp", PATstrimpCreate, false, "construct the strimp 
a BAT", args(1,2,arg("",void),batarg("b",str))),
+ pattern("strimps", "mkstrimp", PATstrimpCreate, false, "construct the strimp 
a BAT", args(1,3,arg("",void),batarg("b",str),batarg("s",oid))),
  pattern("strimps", "strimpfilter", PATstrimpFilter, false, "", 
args(1,3,arg("",bit),arg("b",str),arg("q",str))),
  pattern("strimps", "strimpfilterselect", PATstrimpFilterSelect, false, "", 
args(1,5,batarg("",oid),batarg("b",str),batarg("s",oid),arg("q",str),arg("a",bit))),
  pattern("strimps", "strimpfilterjoin", PATstrimpFilter, false, "", 
args(2,8,batarg("",oid),batarg("b",str),arg("q",str))),
diff --git a/sql/backends/monet5/sql_strimps.c 
b/sql/backends/monet5/sql_strimps.c
--- a/sql/backends/monet5/sql_strimps.c
+++ b/sql/backends/monet5/sql_strimps.c
@@ -50,12 +50,14 @@ sql_load_bat(Client cntxt, MalBlkPtr mb,
 str
 sql_createstrimps(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
 {
-       BAT *b;
+       BAT *b, *s;
 
        if (sql_load_bat(cntxt, mb, stk, pci, &b) != MAL_SUCCEED)
                throw(SQL, "sql.createstrimps", SQLSTATE(HY002) 
OPERATION_FAILED);
 
-       if (STRMPcreate(b) != GDK_SUCCEED)
+       s = BATdense(0, 0, b->batCount);
+
+       if (STRMPcreate(b, s) != GDK_SUCCEED)
                throw(SQL, "sql.createstrimps", SQLSTATE(HY002) 
OPERATION_FAILED);
 
        BBPunfix(b->batCacheid);
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to