Changeset: 13d3324595d4 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/13d3324595d4 Modified Files: monetdb5/modules/mal/pcre.c Branch: default Log Message:
Modernize pcre likeselect loop. Still more optimizations can be done here diffs (truncated from 320 to 300 lines): diff --git a/monetdb5/modules/mal/pcre.c b/monetdb5/modules/mal/pcre.c --- a/monetdb5/modules/mal/pcre.c +++ b/monetdb5/modules/mal/pcre.c @@ -1714,45 +1714,27 @@ BATPCREnotlike(Client cntxt, MalBlkPtr m return BATPCRElike_imp(cntxt, mb, stk, pci, esc, ci, &yes); } -/* these two defines are copies from gdk_select.c */ - -/* scan select loop with candidates */ -#define candscanloop(TEST) \ - do { \ - TRC_DEBUG(ALGO, \ - "BATselect(b=%s#"BUNFMT",s=%s,anti=%d): " \ - "scanselect %s\n", BATgetId(b), BATcount(b), \ - s ? BATgetId(s) : "NULL", anti, #TEST); \ - for (p = 0; p < ci.ncand; p++) { \ - o = canditer_next(&ci); \ - r = (BUN) (o - off); \ - v = BUNtvar(bi, r); \ - if (TEST) \ - if (bunfastappTYPE(oid, bn, &o) != GDK_SUCCEED) { \ - msg = createException(MAL, "pcre.likeselect", OPERATION_FAILED); \ - goto bailout; \ - } \ - } \ - } while (0) - -/* scan select loop without candidates */ -#define scanloop(TEST) \ - do { \ - TRC_DEBUG(ALGO, \ - "BATselect(b=%s#"BUNFMT",s=%s,anti=%d): " \ - "scanselect %s\n", BATgetId(b), BATcount(b), \ - s ? BATgetId(s) : "NULL", anti, #TEST); \ - while (p < q) { \ - v = BUNtvar(bi, p-off); \ - if (TEST) { \ - o = (oid) p; \ - if (bunfastappTYPE(oid, bn, &o) != GDK_SUCCEED) { \ - msg = createException(MAL, "pcre.likeselect", OPERATION_FAILED); \ - goto bailout; \ - } \ - } \ - p++; \ - } \ +/* scan select loop with or without candidates */ +#define pcrescanloop(TEST) \ + do { \ + TRC_DEBUG(ALGO, \ + "PCREselect(b=%s#"BUNFMT",anti=%d): " \ + "scanselect %s\n", BATgetId(b), BATcount(b), \ + anti, #TEST); \ + if (!s || BATtdense(s)) { \ + for (; p < q; p++) { \ + const char *restrict v = BUNtvar(bi, p - off); \ + if (TEST) \ + vals[cnt++] = p; \ + } \ + } else { \ + for (; p < ncands; p++) { \ + oid o = canditer_next(ci); \ + const char *restrict v = BUNtvar(bi, o - off); \ + if (TEST) \ + vals[cnt++] = o; \ + } \ + } \ } while (0) #ifdef HAVE_LIBPCRE @@ -1762,7 +1744,7 @@ BATPCREnotlike(Client cntxt, MalBlkPtr m #endif static str -pcre_likeselect(BAT **bnp, BAT *b, BAT *s, const char *pat, bool caseignore, bool anti) +pcre_likeselect(BAT *bn, BAT *b, BAT *s, struct canditer *ci, BUN p, BUN q, BUN *rcnt, const char *pat, bool caseignore, bool anti) { #ifdef HAVE_LIBPCRE pcre *re = NULL; @@ -1772,158 +1754,66 @@ pcre_likeselect(BAT **bnp, BAT *b, BAT * void *ex = NULL; #endif BATiter bi = bat_iterator(b); - BAT *bn; - BUN p, q, r; - oid o, off = b->hseqbase; - const char *v; + BUN cnt = 0, ncands = ci->ncand; + oid off = b->hseqbase, *restrict vals = Tloc(bn, 0); str msg = MAL_SUCCEED; - struct canditer ci; - canditer_init(&ci, b, s); - - if (!(bn = COLnew(0, TYPE_oid, ci.ncand, TRANSIENT))) { - msg = createException(MAL, "pcre.likeselect", SQLSTATE(HY013) MAL_MALLOC_FAIL); - goto bailout; - } - if ((msg = pcre_like_build(&re, &ex, pat, caseignore, ci.ncand)) != MAL_SUCCEED) + if ((msg = pcre_like_build(&re, &ex, pat, caseignore, ci->ncand)) != MAL_SUCCEED) goto bailout; - if (s && !BATtdense(s)) { - if (anti) - candscanloop(v && *v != '\200' && !PCRE_LIKESELECT_BODY); - else - candscanloop(v && *v != '\200' && PCRE_LIKESELECT_BODY); - } else { - if (s) { - assert(BATtdense(s)); - p = (BUN) s->tseqbase; - q = p + BATcount(s); - if ((oid) p < b->hseqbase) - p = b->hseqbase; - if ((oid) q > b->hseqbase + BATcount(b)) - q = b->hseqbase + BATcount(b); - } else { - p = off; - q = BUNlast(b) + off; - } - - if (anti) - scanloop(v && *v != '\200' && !PCRE_LIKESELECT_BODY); - else - scanloop(v && *v != '\200' && PCRE_LIKESELECT_BODY); - } + if (anti) + pcrescanloop(v && *v != '\200' && !PCRE_LIKESELECT_BODY); + else + pcrescanloop(v && *v != '\200' && PCRE_LIKESELECT_BODY); bailout: pcre_clean(&re, &ex); - if (bn && !msg) { - BATsetcount(bn, BATcount(bn)); /* set some properties */ - bn->tsorted = true; - bn->trevsorted = bn->batCount <= 1; - bn->tkey = true; - bn->tseqbase = bn->batCount == 0 ? 0 : bn->batCount == 1 ? * (oid *) Tloc(bn, 0) : oid_nil; - } - *bnp = bn; + *rcnt = cnt; return msg; } static str -re_likeselect(BAT **bnp, BAT *b, BAT *s, const char *pat, bool caseignore, bool anti, bool use_strcmp, uint32_t esc) +re_likeselect(BAT *bn, BAT *b, BAT *s, struct canditer *ci, BUN p, BUN q, BUN *rcnt, const char *pat, bool caseignore, bool anti, bool use_strcmp, uint32_t esc) { BATiter bi = bat_iterator(b); - BAT *bn = NULL; - BUN p, q, r; - oid o, off = b->hseqbase; - const char *v; + BUN cnt = 0, ncands = ci->ncand; + oid off = b->hseqbase, *restrict vals = Tloc(bn, 0); struct RE *re = NULL; uint32_t *wpat = NULL; str msg = MAL_SUCCEED; - struct canditer ci; - canditer_init(&ci, b, s); - - if (!(bn = COLnew(0, TYPE_oid, ci.ncand, TRANSIENT))) { - msg = createException(MAL, "pcre.likeselect", SQLSTATE(HY013) MAL_MALLOC_FAIL); - goto bailout; - } if ((msg = re_like_build(&re, &wpat, pat, caseignore, use_strcmp, esc)) != MAL_SUCCEED) goto bailout; - if (s && !BATtdense(s)) { - if (use_strcmp) { - if (caseignore) { - if (anti) - candscanloop(v && *v != '\200' && mywstrcasecmp(v, wpat) != 0); - else - candscanloop(v && *v != '\200' && mywstrcasecmp(v, wpat) == 0); - } else { - if (anti) - candscanloop(v && *v != '\200' && strcmp(v, pat) != 0); - else - candscanloop(v && *v != '\200' && strcmp(v, pat) == 0); - } + if (use_strcmp) { + if (caseignore) { + if (anti) + pcrescanloop(v && *v != '\200' && mywstrcasecmp(v, wpat) != 0); + else + pcrescanloop(v && *v != '\200' && mywstrcasecmp(v, wpat) == 0); } else { - if (caseignore) { - if (anti) - candscanloop(v && *v != '\200' && !re_match_ignore(v, re)); - else - candscanloop(v && *v != '\200' && re_match_ignore(v, re)); - } else { - if (anti) - candscanloop(v && *v != '\200' && !re_match_no_ignore(v, re)); - else - candscanloop(v && *v != '\200' && re_match_no_ignore(v, re)); - } + if (anti) + pcrescanloop(v && *v != '\200' && strcmp(v, pat) != 0); + else + pcrescanloop(v && *v != '\200' && strcmp(v, pat) == 0); } } else { - if (s) { - assert(BATtdense(s)); - p = (BUN) s->tseqbase; - q = p + BATcount(s); - if ((oid) p < b->hseqbase) - p = b->hseqbase; - if ((oid) q > b->hseqbase + BATcount(b)) - q = b->hseqbase + BATcount(b); + if (caseignore) { + if (anti) + pcrescanloop(v && *v != '\200' && !re_match_ignore(v, re)); + else + pcrescanloop(v && *v != '\200' && re_match_ignore(v, re)); } else { - p = off; - q = BUNlast(b) + off; - } - if (use_strcmp) { - if (caseignore) { - if (anti) - scanloop(v && *v != '\200' && mywstrcasecmp(v, wpat) != 0); - else - scanloop(v && *v != '\200' && mywstrcasecmp(v, wpat) == 0); - } else { - if (anti) - scanloop(v && *v != '\200' && strcmp(v, pat) != 0); - else - scanloop(v && *v != '\200' && strcmp(v, pat) == 0); - } - } else { - if (caseignore) { - if (anti) - scanloop(v && *v != '\200' && !re_match_ignore(v, re)); - else - scanloop(v && *v != '\200' && re_match_ignore(v, re)); - } else { - if (anti) - scanloop(v && *v != '\200' && !re_match_no_ignore(v, re)); - else - scanloop(v && *v != '\200' && re_match_no_ignore(v, re)); - } + if (anti) + pcrescanloop(v && *v != '\200' && !re_match_no_ignore(v, re)); + else + pcrescanloop(v && *v != '\200' && re_match_no_ignore(v, re)); } } bailout: re_like_clean(&re, &wpat); - if (bn && !msg) { - BATsetcount(bn, BATcount(bn)); /* set some properties */ - bn->tsorted = true; - bn->trevsorted = bn->batCount <= 1; - bn->tkey = true; - bn->tseqbase = bn->batCount == 0 ? 0 : bn->batCount == 1 ? * (oid *) Tloc(bn, 0) : oid_nil; - } - *bnp = bn; + *rcnt = cnt; return msg; } @@ -1951,13 +1841,46 @@ PCRElikeselect(bat *ret, const bat *bid, MT_thread_setalgorithm(empty ? "pcrelike: trivially empty" : use_strcmp ? "pcrelike: pattern matching using strcmp" : use_re ? "pcrelike: pattern matching using RE" : "pcrelike: pattern matching using pcre"); - if (use_re) { - msg = re_likeselect(&bn, b, s, *pat, (bool) *caseignore, (bool) *anti, use_strcmp, (unsigned char) **esc); - } else if (empty) { + if (empty) { if (!(bn = BATdense(0, 0, 0))) msg = createException(MAL, "algebra.likeselect", SQLSTATE(HY013) MAL_MALLOC_FAIL); } else { - msg = pcre_likeselect(&bn, b, s, ppat, (bool) *caseignore, (bool) *anti); + BUN p = 0, q = 0, rcnt = 0; + struct canditer ci; + + canditer_init(&ci, b, s); + if (!(bn = COLnew(0, TYPE_oid, ci.ncand, TRANSIENT))) { + msg = createException(MAL, "algebra.likeselect", SQLSTATE(HY013) MAL_MALLOC_FAIL); + goto bailout; + } + + if (!s || BATtdense(s)) { + if (s) { + assert(BATtdense(s)); + p = (BUN) s->tseqbase; + q = p + BATcount(s); + if ((oid) p < b->hseqbase) + p = b->hseqbase; + if ((oid) q > b->hseqbase + BATcount(b)) + q = b->hseqbase + BATcount(b); + } else { _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list