MonetDB: default - Defense at overwriting error messages in the ...

2021-08-31 Thread Martin Kersten
Changeset: 985ea67a022c for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/985ea67a022c
Modified Files:
monetdb5/mal/mal_instruction.c
tools/monetdbe/monetdbe.c
Branch: default
Log Message:

Defense at overwriting error messages in the mdbe structure.
The first one should be handled, this means in practice that
the application code should clear the error status before issuing
a new request.

The code can be checked to see if the first message returned
always makes sense


diffs (truncated from 860 to 300 lines):

diff --git a/monetdb5/mal/mal_instruction.c b/monetdb5/mal/mal_instruction.c
--- a/monetdb5/mal/mal_instruction.c
+++ b/monetdb5/mal/mal_instruction.c
@@ -1150,6 +1150,7 @@ defConstant(MalBlkPtr mb, int type, ValP
GDKfree(ft);
GDKfree(tt);
freeException(msg);
+   VALclear(cst);  // it could contain allocated space
return -1;
} else {
assert(cst->vtype == type);
diff --git a/tools/monetdbe/monetdbe.c b/tools/monetdbe/monetdbe.c
--- a/tools/monetdbe/monetdbe.c
+++ b/tools/monetdbe/monetdbe.c
@@ -295,7 +295,7 @@ monetdbe_get_results(monetdbe_result** r
monetdbe_result_internal* res_internal;
 
if (!(res_internal = GDKzalloc(sizeof(monetdbe_result_internal {
-   mdbe->msg = createException(MAL, 
"monetdbe.monetdbe_get_results", MAL_MALLOC_FAIL);
+   set_error(mdbe, createException(MAL, 
"monetdbe.monetdbe_get_results", MAL_MALLOC_FAIL));
return mdbe->msg;
}
// TODO: set type of result outside.
@@ -314,7 +314,7 @@ monetdbe_get_results(monetdbe_result** r
if (!res_internal->converted_columns) {
GDKfree(res_internal);
*result = NULL;
-   mdbe->msg = createException(MAL, 
"monetdbe.monetdbe_get_results", MAL_MALLOC_FAIL);
+   set_error(mdbe, createException(MAL, 
"monetdbe.monetdbe_get_results", MAL_MALLOC_FAIL));
return mdbe->msg;
}
}
@@ -347,11 +347,11 @@ monetdbe_query_internal(monetdbe_databas
b = (backend *) c->sqlcontext;
 
if (!query) {
-   mdbe->msg = createException(MAL, 
"monetdbe.monetdbe_query_internal", "Query missing");
+   set_error(mdbe, createException(MAL, 
"monetdbe.monetdbe_query_internal", "Query missing"));
goto cleanup;
}
if (!(query_stream = buffer_rastream(&query_buf, "sqlstatement"))) {
-   mdbe->msg = createException(MAL, 
"monetdbe.monetdbe_query_internal", "Could not setup query stream");
+   set_error(mdbe, createException(MAL, 
"monetdbe.monetdbe_query_internal", "Could not setup query stream"));
goto cleanup;
}
input_query_len = strlen(query);
@@ -361,7 +361,7 @@ monetdbe_query_internal(monetdbe_databas
query_len += prep_len;
}
if (!(nq = GDKmalloc(query_len))) {
-   mdbe->msg = createException(MAL, 
"monetdbe.monetdbe_query_internal", MAL_MALLOC_FAIL);
+   set_error(mdbe, createException(MAL, 
"monetdbe.monetdbe_query_internal", MAL_MALLOC_FAIL));
goto cleanup;
}
if (prepare_id)
@@ -375,12 +375,12 @@ monetdbe_query_internal(monetdbe_databas
 
fdin_changed = true;
if (!(c->fdin = bstream_create(query_stream, query_len))) {
-   mdbe->msg = createException(MAL, 
"monetdbe.monetdbe_query_internal", "Could not setup query stream");
+   set_error(mdbe, createException(MAL, 
"monetdbe.monetdbe_query_internal", "Could not setup query stream"));
goto cleanup;
}
query_stream = NULL;
if (bstream_next(c->fdin) < 0) {
-   mdbe->msg = createException(MAL, 
"monetdbe.monetdbe_query_internal", "Internal error while starting the query");
+   set_error(mdbe, createException(MAL, 
"monetdbe.monetdbe_query_internal", "Internal error while starting the query"));
goto cleanup;
}
 
@@ -484,7 +484,7 @@ monetdbe_workers_internal(monetdbe_datab
int workers = 0;
if (opts && opts->nr_threads) {
if (opts->nr_threads < 0)
-   mdbe->msg = createException(MAL, 
"monetdbe.monetdbe_startup", "Nr_threads should be positive");
+   set_error(mdbe,createException(MAL, 
"monetdbe.monetdbe_startup", "Nr_threads should be positive"));
else
workers = GDKnr_threads = opts->nr_threads;
}
@@ -497,7 +497,7 @@ monetdbe_memory_internal(monetdbe_databa
int memory = 0;
if (opts && opts->memorylimit) {
if (opts->memorylimit < 0)
-   mdbe->msg = createException(MAL, 
"monetdbe.monetdbe_startup", "Memorylimit should be po

MonetDB: default - Free previous exception before overwriting

2021-08-31 Thread Pedro Ferreira
Changeset: 7084820ef155 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/7084820ef155
Modified Files:
monetdb5/mal/mal_interpreter.c
Branch: default
Log Message:

Free previous exception before overwriting


diffs (26 lines):

diff --git a/monetdb5/mal/mal_interpreter.c b/monetdb5/mal/mal_interpreter.c
--- a/monetdb5/mal/mal_interpreter.c
+++ b/monetdb5/mal/mal_interpreter.c
@@ -944,8 +944,10 @@ str runMALsequence(Client cntxt, MalBlkP
 
/* unknown exceptions lead to propagation */
if (exceptionVar == -1) {
-if (cntxt->querytimeout && mb->starttime && GDKusec()- 
mb->starttime > cntxt->querytimeout)
-ret= createException(MAL, "mal.interpreter", 
SQLSTATE(HYT00) RUNTIME_QRY_TIMEOUT);
+   if (cntxt->querytimeout && mb->starttime && 
GDKusec()- mb->starttime > cntxt->querytimeout) {
+   freeException(ret);
+   ret = createException(MAL, 
"mal.interpreter", SQLSTATE(HYT00) RUNTIME_QRY_TIMEOUT);
+   }
stkpc = mb->stop;
continue;
}
@@ -994,7 +996,8 @@ str runMALsequence(Client cntxt, MalBlkP
}
if (stkpc == mb->stop) {
if (cntxt->querytimeout && mb->starttime && 
GDKusec()- mb->starttime > cntxt->querytimeout){
-   ret= createException(MAL, 
"mal.interpreter", SQLSTATE(HYT00) RUNTIME_QRY_TIMEOUT);
+   freeException(ret);
+   ret = createException(MAL, 
"mal.interpreter", SQLSTATE(HYT00) RUNTIME_QRY_TIMEOUT);
stkpc = mb->stop;
}
continue;
___
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list


MonetDB: Jul2021 - Don't leak optimizer error message

2021-08-31 Thread Pedro Ferreira
Changeset: 3a8a37d1da2b for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/3a8a37d1da2b
Modified Files:
monetdb5/optimizer/opt_wrapper.c
Branch: Jul2021
Log Message:

Don't leak optimizer error message


diffs (14 lines):

diff --git a/monetdb5/optimizer/opt_wrapper.c b/monetdb5/optimizer/opt_wrapper.c
--- a/monetdb5/optimizer/opt_wrapper.c
+++ b/monetdb5/optimizer/opt_wrapper.c
@@ -150,7 +150,9 @@ str OPTwrapper (Client cntxt, MalBlkPtr 
codes[i].timing += GDKusec() - clk;
codes[i].calls++;
if (msg) {
-   throw(MAL, codes[i].nme, SQLSTATE(42000) "Error 
in optimizer %s: %s", codes[i].nme, msg);
+   str newmsg = createException(MAL, codes[i].nme, 
SQLSTATE(42000) "Error in optimizer %s: %s", codes[i].nme, msg);
+   freeException(msg);
+   return newmsg;
}
break;
}
___
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list


MonetDB: default - Merged with Jul2021

2021-08-31 Thread Pedro Ferreira
Changeset: adf48b960aff for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/adf48b960aff
Modified Files:
monetdb5/optimizer/opt_pushselect.c
monetdb5/optimizer/opt_support.c
monetdb5/optimizer/opt_support.h
Branch: default
Log Message:

Merged with Jul2021


diffs (29 lines):

diff --git a/sql/backends/monet5/UDF/pyapi3/pyapi3.c 
b/sql/backends/monet5/UDF/pyapi3/pyapi3.c
--- a/sql/backends/monet5/UDF/pyapi3/pyapi3.c
+++ b/sql/backends/monet5/UDF/pyapi3/pyapi3.c
@@ -185,7 +185,7 @@ static str PyAPIeval(Client cntxt, MalBl
int retcols;
bool gstate = 0;
int unnamedArgs = 0;
-   bool parallel_aggregation = grouped && mapped;
+   bool parallel_aggregation = grouped && mapped, freeexprStr = false;
int argcount = pci->argc;
 
char *eval_additional_args[] = {"_columns", "_column_types", "_conn"};
@@ -593,6 +593,7 @@ static str PyAPIeval(Client cntxt, MalBl
  
SQLSTATE(HY013) MAL_MALLOC_FAIL " function body string.");
goto wrapup;
}
+   freeexprStr = true;
if (fread(exprStr, 1, (size_t) length, fp) != (size_t) 
length) {
msg = createException(MAL, "pyapi3.eval",
  
SQLSTATE(PY000) "Failed to read from file \"%s\".",
@@ -1342,6 +1343,8 @@ wrapup:
GDKfree(args[i]);
GDKfree(args);
GDKfree(pycall);
+   if (freeexprStr)
+   GDKfree(exprStr);
 
return msg;
 }
___
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list


MonetDB: Jul2021 - Use MAL namespace string in order for call to...

2021-08-31 Thread Pedro Ferreira
Changeset: ecdeafdb2aee for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/ecdeafdb2aee
Modified Files:
monetdb5/optimizer/opt_pushselect.c
monetdb5/optimizer/opt_support.c
monetdb5/optimizer/opt_support.h
sql/backends/monet5/UDF/pyapi3/pyapi3.c
Branch: Jul2021
Log Message:

Use MAL namespace string in order for call to work. Also fixed embedded python 
memory leak


diffs (65 lines):

diff --git a/monetdb5/optimizer/opt_pushselect.c 
b/monetdb5/optimizer/opt_pushselect.c
--- a/monetdb5/optimizer/opt_pushselect.c
+++ b/monetdb5/optimizer/opt_pushselect.c
@@ -149,7 +149,7 @@ OPTpushselectImplementation(Client cntxt
if( mb->errors)
return MAL_SUCCEED;
 
-   no_mito = !isOptimizerEnabled(mb, "mitosis");
+   no_mito = !isOptimizerEnabled(mb, mitosisRef);
(void) stk;
(void) pci;
vars= (int*) GDKzalloc(sizeof(int)* mb->vtop);
diff --git a/monetdb5/optimizer/opt_support.c b/monetdb5/optimizer/opt_support.c
--- a/monetdb5/optimizer/opt_support.c
+++ b/monetdb5/optimizer/opt_support.c
@@ -40,7 +40,7 @@ optimizerIsApplied(MalBlkPtr mb, str opt
  * requires inspection of the pipeline attached to a MAL block.
  */
 int
-isOptimizerEnabled(MalBlkPtr mb, str opt)
+isOptimizerEnabled(MalBlkPtr mb, const char *opt)
 {
int i;
InstrPtr q;
diff --git a/monetdb5/optimizer/opt_support.h b/monetdb5/optimizer/opt_support.h
--- a/monetdb5/optimizer/opt_support.h
+++ b/monetdb5/optimizer/opt_support.h
@@ -49,7 +49,7 @@ extern int isOrderby(InstrPtr q);
 extern int isSelect(InstrPtr q);
 extern int isSubJoin(InstrPtr q);
 extern int isMultiplex(InstrPtr q);
-extern int isOptimizerEnabled(MalBlkPtr mb, str opt);
+extern int isOptimizerEnabled(MalBlkPtr mb, const char *opt);
 extern int isOptimizerUsed(MalBlkPtr mb, str opt);
 
 #endif /* _OPT_SUPPORT_H */
diff --git a/sql/backends/monet5/UDF/pyapi3/pyapi3.c 
b/sql/backends/monet5/UDF/pyapi3/pyapi3.c
--- a/sql/backends/monet5/UDF/pyapi3/pyapi3.c
+++ b/sql/backends/monet5/UDF/pyapi3/pyapi3.c
@@ -185,7 +185,7 @@ static str PyAPIeval(Client cntxt, MalBl
int retcols;
bool gstate = 0;
int unnamedArgs = 0;
-   bool parallel_aggregation = grouped && mapped;
+   bool parallel_aggregation = grouped && mapped, freeexprStr = false;
int argcount = pci->argc;
 
char *eval_additional_args[] = {"_columns", "_column_types", "_conn"};
@@ -593,6 +593,7 @@ static str PyAPIeval(Client cntxt, MalBl
  
SQLSTATE(HY013) MAL_MALLOC_FAIL " function body string.");
goto wrapup;
}
+   freeexprStr = true;
if (fread(exprStr, 1, (size_t) length, fp) != (size_t) 
length) {
msg = createException(MAL, "pyapi3.eval",
  
SQLSTATE(PY000) "Failed to read from file \"%s\".",
@@ -1342,6 +1343,8 @@ wrapup:
GDKfree(args[i]);
GDKfree(args);
GDKfree(pycall);
+   if (freeexprStr)
+   GDKfree(exprStr);
 
return msg;
 }
___
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list


MonetDB: default - Don't call sprintf on MAL optimizers. Overall...

2021-08-31 Thread Pedro Ferreira
Changeset: ab557e39932c for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/ab557e39932c
Modified Files:
clients/Tests/MAL-signatures.stable.out
clients/Tests/MAL-signatures.stable.out.int128
clients/Tests/exports.stable.out
monetdb5/extras/mal_optimizer_template/opt_sql_append.c
monetdb5/extras/rapi/Tests/rapi01.maltest
monetdb5/mal/Tests/tst024.maltest
monetdb5/mal/Tests/tst034.maltest
monetdb5/mal/Tests/tst059.maltest
monetdb5/mal/Tests/tst071.maltest
monetdb5/mal/Tests/tst163.maltest
monetdb5/mal/Tests/tst194.maltest
monetdb5/mal/Tests/tst195.maltest
monetdb5/mal/Tests/tst196.maltest
monetdb5/mal/Tests/tst220.maltest
monetdb5/mal/Tests/tst230.maltest
monetdb5/mal/Tests/tst231.maltest
monetdb5/mal/Tests/tst232.maltest
monetdb5/mal/mal_client.c
monetdb5/mal/mal_factory.c
monetdb5/mal/mal_function.c
monetdb5/mal/mal_instruction.c
monetdb5/mal/mal_instruction.h
monetdb5/mal/mal_listing.c
monetdb5/mal/mal_session.c
monetdb5/mal/mal_session.h
monetdb5/optimizer/opt_aliases.c
monetdb5/optimizer/opt_bincopyfrom.c
monetdb5/optimizer/opt_candidates.c
monetdb5/optimizer/opt_coercion.c
monetdb5/optimizer/opt_commonTerms.c
monetdb5/optimizer/opt_constants.c
monetdb5/optimizer/opt_costModel.c
monetdb5/optimizer/opt_dataflow.c
monetdb5/optimizer/opt_deadcode.c
monetdb5/optimizer/opt_emptybind.c
monetdb5/optimizer/opt_evaluate.c
monetdb5/optimizer/opt_fastpath.c
monetdb5/optimizer/opt_garbageCollector.c
monetdb5/optimizer/opt_generator.c
monetdb5/optimizer/opt_inline.c
monetdb5/optimizer/opt_jit.c
monetdb5/optimizer/opt_json.c
monetdb5/optimizer/opt_macro.c
monetdb5/optimizer/opt_mask.c
monetdb5/optimizer/opt_matpack.c
monetdb5/optimizer/opt_mergetable.c
monetdb5/optimizer/opt_mitosis.c
monetdb5/optimizer/opt_multiplex.c
monetdb5/optimizer/opt_oltp.c
monetdb5/optimizer/opt_pipes.c
monetdb5/optimizer/opt_postfix.c
monetdb5/optimizer/opt_prelude.c
monetdb5/optimizer/opt_prelude.h
monetdb5/optimizer/opt_profiler.c
monetdb5/optimizer/opt_projectionpath.c
monetdb5/optimizer/opt_pushselect.c
monetdb5/optimizer/opt_querylog.c
monetdb5/optimizer/opt_reduce.c
monetdb5/optimizer/opt_remap.c
monetdb5/optimizer/opt_remoteQueries.c
monetdb5/optimizer/opt_reorder.c
monetdb5/optimizer/opt_support.c
monetdb5/optimizer/opt_support.h
monetdb5/optimizer/opt_volcano.c
monetdb5/optimizer/opt_wlc.c
monetdb5/optimizer/opt_wrapper.c
monetdb5/optimizer/optimizer.c
sql/backends/monet5/sql_execute.c
sql/backends/monet5/sql_optimizer.c
Branch: default
Log Message:

Don't call sprintf on MAL optimizers. Overall cleanup


diffs (truncated from 3827 to 300 lines):

diff --git a/clients/Tests/MAL-signatures.stable.out 
b/clients/Tests/MAL-signatures.stable.out
--- a/clients/Tests/MAL-signatures.stable.out
+++ b/clients/Tests/MAL-signatures.stable.out
@@ -9110,6 +9110,8 @@
 [ "optimizer", "volcano_pipe", "function optimizer.volcano_pipe():void;",  
"", ""  ]
 [ "optimizer", "wlc",  "pattern optimizer.wlc():str ", "OPTwrapper;",  ""  
]
 [ "optimizer", "wlc",  "pattern optimizer.wlc(X_0:str, X_1:str):str ", 
"OPTwrapper;",  "Inject the workload capture-replay primitives" ]
+[ "optimizer", "wrapper",  "pattern optimizer.wrapper():str ", 
"OPTwrapper;",  ""  ]
+[ "optimizer", "wrapper",  "pattern optimizer.wrapper(X_0:str, 
X_1:str):str ", "OPTwrapper;",  "Fake optimizer"]
 [ "pcre",  "imatch",   "command pcre.imatch(X_0:str, X_1:str):bit ",   
"PCREimatch;",  "Caseless Perl Compatible Regular Expression pattern matching 
against a string" ]
 [ "pcre",  "index","command pcre.index(X_0:pcre, X_1:str):int ",   
"PCREindex;",   "match a pattern, return matched position (or 0 when not 
found)"]
 [ "pcre",  "match","command pcre.match(X_0:str, X_1:str):bit ",
"PCREmatch;",   "Perl Compatible Regular Expression pattern matching against a 
string"  ]
diff --git a/clients/Tests/MAL-signatures.stable.out.int128 
b/clients/Tests/MAL-signatures.stable.out.int128
--- a/clients/Tests/MAL-signatures.stable.out.int128
+++ b/clients/Tests/MAL-signatures.stable.out.int128
@@ -12410,6 +12410,8 @@
 [ "optimizer", "volcano_pipe", "function optimizer.volcano_pipe():void;",  
"", ""  ]
 [ "optimizer", "wlc",  "pattern optimizer.wlc():str ", "OPTwrapper;",  ""  
]
 [ "optimizer", "wlc",  "pattern optimizer.wlc(X_0:str, X_1:str):str ", 
"OPTwrapper;",  "Inject the workload capture-replay primitives" ]
+[ "optimizer", "wrapper",  

MonetDB: string_imprints - Fix reading strimps from the disk

2021-08-31 Thread Panagiotis Koutsourakis
Changeset: 1d265612c715 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/1d265612c715
Modified Files:
gdk/gdk_strimps.c
Branch: string_imprints
Log Message:

Fix reading strimps from the disk


diffs (44 lines):

diff --git a/gdk/gdk_strimps.c b/gdk/gdk_strimps.c
--- a/gdk/gdk_strimps.c
+++ b/gdk/gdk_strimps.c
@@ -423,25 +423,29 @@ BATcheckstrimps(BAT *b)
uint64_t npairs;
uint64_t hsize;
/* Read the 8 byte long strimp
-* descriptor and make sure that
-* the number of pairs is either
-* 32 or 64.
+* descriptor.
+*
+* NPAIRS must be 64 in the
+* current implementation.
+*
+* HSIZE must be between 200 and
+* 584 (inclusive): 8 bytes the
+* descritor, 64 bytes the pair
+* sizes and n*64 bytes the
+* actual pairs where 2 <= n <=
+* 8.
 */
if (read(fd, &desc, 8) == 8
&& (desc & 0xff) == STRIMP_VERSION
-   && ((npairs = NPAIRS(desc)) == 32 
|| npairs == 64)
-   && (hsize = HSIZE(desc)) >= 96 && 
hsize <= 640
+   && ((npairs = NPAIRS(desc)) == 64)
+   && (hsize = HSIZE(desc)) >= 200 && 
hsize <= 584
&& ((desc >> 32) & 0xff) == 1 /* 
check the persistence byte */
&& fstat(fd, &st) == 0
&& st.st_size >= (off_t) 
(hp->strimps.free = hp->strimps.size =
- /* 
descriptor */
- 8 +
- /* header 
size (offsets + pairs) */
+ /* header 
size (desc + offsets + pairs) */
  hsize +
- /* 
padding to 4 or 8 byte boundary */
- 
hsize%(npairs/8) == (npairs/8)? 0 : (npairs/8+((npairs/8) - hsize%(npairs/8))) +
  /* 
bitmasks */
- 
BATcount(b)*(npairs/8))
+ 
BATcount(b)*sizeof(uint64_t))
&& HEAPload(&hp->strimps, nme, 
"tstrimps", false) == GDK_SUCCEED) {
hp->sizes_base = (uint8_t 
*)hp->strimps.base + 8; /* sizes just after the descriptor */
hp->pairs_base = hp->sizes_base 
+ npairs; /* pairs just after the offsets */
___
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list


MonetDB: string_imprints - Use correct address to bitstrings

2021-08-31 Thread Panagiotis Koutsourakis
Changeset: 97a1a209934a for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/97a1a209934a
Modified Files:
gdk/gdk_strimps.c
Branch: string_imprints
Log Message:

Use correct address to bitstrings

Specifically do not use the value of strimps.free because it is
updated by multiple threads.


diffs (12 lines):

diff --git a/gdk/gdk_strimps.c b/gdk/gdk_strimps.c
--- a/gdk/gdk_strimps.c
+++ b/gdk/gdk_strimps.c
@@ -655,7 +655,7 @@ STRMPcreate(BAT *b, BAT *s)
 if ((h = STRMPcreateStrimpHeap(pb, s)) == NULL) {
return GDK_FAIL;
}
-   dh = (uint64_t *)((uint8_t*)h->strimps.base + h->strimps.free + 
b->hseqbase*8);
+   dh = (uint64_t *)h->strimps_base + b->hseqbase;
 
ncand = canditer_init(&ci, b, s);
 
___
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list


MonetDB: string_imprints - Persist strimp when it is fully created

2021-08-31 Thread Panagiotis Koutsourakis
Changeset: 7bd939f4ba52 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/7bd939f4ba52
Modified Files:
gdk/gdk_strimps.c
Branch: string_imprints
Log Message:

Persist strimp when it is fully created

A strimp is fully created when we have computed a bitstring for every
string in the bat.


diffs (32 lines):

diff --git a/gdk/gdk_strimps.c b/gdk/gdk_strimps.c
--- a/gdk/gdk_strimps.c
+++ b/gdk/gdk_strimps.c
@@ -621,8 +621,6 @@ persistStrimp(BAT *b)
TRC_DEBUG(ACCELERATOR, "persistStrimp(" ALGOBATFMT "): NOT 
persisting strimp\n", ALGOBATPAR(b));
 }
 
-static ATOMIC_TYPE STRMPnthread = ATOMIC_VAR_INIT(0);
-
 /* Create */
 gdk_return
 STRMPcreate(BAT *b, BAT *s)
@@ -643,7 +641,6 @@ STRMPcreate(BAT *b, BAT *s)
return GDK_FAIL;
}
 
-   (void)ATOMIC_INC(&STRMPnthread);
/* Disable this before merging to default */
 if (VIEWtparent(b)) {
pb = BBP_cache(VIEWtparent(b));
@@ -678,10 +675,8 @@ STRMPcreate(BAT *b, BAT *s)
MT_lock_unset(&b->batIdxLock);
 
/* The thread that reaches this point last needs to write the strimp to 
disk. */
-   (void)ATOMIC_DEC(&STRMPnthread);
-   if (STRMPnthread == 0) {
+   if (STRIMP_COMPLETE(pb))
persistStrimp(pb);
-   }
 
TRC_DEBUG(ACCELERATOR, "strimp creation took " LLFMT " usec\n", 
GDKusec()-t0);
return GDK_SUCCEED;
___
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list


MonetDB: string_imprints - Code cleanup

2021-08-31 Thread Panagiotis Koutsourakis
Changeset: 3ca155710c3f for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/3ca155710c3f
Modified Files:
gdk/gdk_strimps.c
Branch: string_imprints
Log Message:

Code cleanup


diffs (88 lines):

diff --git a/gdk/gdk_strimps.c b/gdk/gdk_strimps.c
--- a/gdk/gdk_strimps.c
+++ b/gdk/gdk_strimps.c
@@ -77,8 +77,8 @@
 /* Macros for accessing metadada of a strimp. These are recorded in the
  * first 8 bytes of the heap.
  */
-#define NPAIRS(d) ((d) >> 8) & 0xff
-#define HSIZE(d) ((d) >> 16) & 0x
+#define NPAIRS(d) (((d) >> 8) & 0xff)
+#define HSIZE(d) (((d) >> 16) & 0x)
 
 #undef UTF8STRINGS /* Not using utf8 for now */
 #ifdef UTF8STRINGS
@@ -371,9 +371,10 @@ STRMPcreateStrimpHeap(BAT *b, BAT *s)
r->pairs_base = h2 = (uint8_t *)h1 + STRIMP_HEADER_SIZE;
 
for (i = 0; i < STRIMP_HEADER_SIZE; i++) {
-   *(h1 + i) = hpairs[i].psize;
-   memcpy(h2, hpairs[i].pbytes, hpairs[i].psize);
-   h2 += hpairs[i].psize;
+   uint8_t psize = hpairs[i].psize;
+   h1[i] = psize;
+   memcpy(h2, hpairs[i].pbytes, psize);
+   h2 += psize;
}
r->strimps_base = h2;
r->strimps.free = sz;
@@ -386,12 +387,20 @@ STRMPcreateStrimpHeap(BAT *b, BAT *s)
 return b->tstrimps;
 }
 
+#define STRIMP_COMPLETE(b) \
+   b->tstrimps != NULL &&\
+   (b->tstrimps->strimps.free - ((char *)b->tstrimps->strimps_base - 
b->tstrimps->strimps.base))/sizeof(uint64_t) == b->batCount
+
 static bool
 BATcheckstrimps(BAT *b)
 {
bool ret;
lng t = GDKusec();
 
+   if (b == NULL)
+   return false;
+
+   assert(b->batCacheid > 0);
if (b->tstrimps == (Strimps *)1) {
assert(!GDKinmemory(b->theap->farmid));
MT_lock_set(&b->batIdxLock);
@@ -422,7 +431,7 @@ BATcheckstrimps(BAT *b)
&& (desc & 0xff) == STRIMP_VERSION
&& ((npairs = NPAIRS(desc)) == 32 
|| npairs == 64)
&& (hsize = HSIZE(desc)) >= 96 && 
hsize <= 640
-   && ((desc & ((uint64_t)0xff << 32)) 
>> 32) == 1
+   && ((desc >> 32) & 0xff) == 1 /* 
check the persistence byte */
&& fstat(fd, &st) == 0
&& st.st_size >= (off_t) 
(hp->strimps.free = hp->strimps.size =
  /* 
descriptor */
@@ -460,12 +469,15 @@ BATcheckstrimps(BAT *b)
 * not null and the number of bitstrings is equal to the bat
 * count.
 */
-ret = b->tstrimps != NULL &&
-   (b->tstrimps->strimps.free - ((char *)b->tstrimps->strimps_base 
- b->tstrimps->strimps.base))/sizeof(uint64_t) == b->batCount;
-   if (ret)
-   TRC_DEBUG(ACCELERATOR, "BATcheckstrimps(" ALGOBATFMT "): 
already has strimps, waited " LLFMT " usec\n", ALGOBATPAR(b), GDKusec() - t);
+   assert(!b->tstrimps || (b->tstrimps->strimps.free - HSIZE(((uint64_t 
*)b->tstrimps->strimps.base)[0]))/sizeof(uint64_t) <= b->batCount);
+   ret = STRIMP_COMPLETE(b);
+if (ret) {
+   TRC_DEBUG(ACCELERATOR,
+ "BATcheckstrimps(" ALGOBATFMT "): already has 
strimps, waited " LLFMT " usec\n",
+ ALGOBATPAR(b), GDKusec() - t);
+   }
 
-   return ret;
+return ret;
 }
 
 /* Filter a BAT b using a string q. Return the result as a candidate
@@ -582,7 +594,7 @@ BATstrimpsync(void *arg)
failed = "";
}
}
-   TRC_DEBUG(ACCELERATOR, "BATstrimpsync(%s): strimps 
persisted"
+   TRC_DEBUG(ACCELERATOR, "BATstrimpsync(%s): strimp 
persisted"
  " (" LLFMT " usec)%s\n",
  BATgetId(b), GDKusec() - t0, failed);
}
___
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list


MonetDB: string_imprints - Do not create strimps for small bats

2021-08-31 Thread Panagiotis Koutsourakis
Changeset: 2e33639ce402 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/2e33639ce402
Modified Files:
gdk/gdk_strimps.h
monetdb5/modules/mal/pcre.c
Branch: string_imprints
Log Message:

Do not create strimps for small bats


diffs (32 lines):

diff --git a/gdk/gdk_strimps.h b/gdk/gdk_strimps.h
--- a/gdk/gdk_strimps.h
+++ b/gdk/gdk_strimps.h
@@ -15,6 +15,7 @@
 #define STRIMP_VERSION (uint64_t)1
 #define STRIMP_HISTSIZE 256*256
 #define STRIMP_HEADER_SIZE 64
+#define STRIMP_CREATION_THRESHOLD 5000 /* do not create strimp for "small" 
BATs */
 
 typedef struct {
uint8_t *pbytes;
diff --git a/monetdb5/modules/mal/pcre.c b/monetdb5/modules/mal/pcre.c
--- a/monetdb5/modules/mal/pcre.c
+++ b/monetdb5/modules/mal/pcre.c
@@ -1886,13 +1886,14 @@ PCRElikeselect(bat *ret, const bat *bid,
 
assert(ATOMstorage(b->ttype) == TYPE_str);
 
-   if (use_strimps) {
+   if (use_strimps && BATcount(b) >= STRIMP_CREATION_THRESHOLD) {
if (STRMPcreate(b, NULL) == GDK_SUCCEED) {
BAT *tmp_s;
tmp_s = STRMPfilter(b, s, *pat);
-   if(s)
+   if (tmp_s && s) {
BBPunfix(s->batCacheid);
-   s = tmp_s;
+   s = tmp_s;
+   }
} /* If we cannot create the strimp just continue normally */
 
}
___
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list


MonetDB: string_imprints - Merge with default

2021-08-31 Thread Panagiotis Koutsourakis
Changeset: 8d90a78cdb68 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/8d90a78cdb68
Modified Files:
gdk/gdk.h
gdk/gdk_bbp.c
gdk/gdk_private.h
monetdb5/optimizer/opt_prelude.c
monetdb5/optimizer/opt_prelude.h
sql/backends/monet5/sql.c
sql/scripts/CMakeLists.txt
Branch: string_imprints
Log Message:

Merge with default


diffs (truncated from 32345 to 300 lines):

diff --git a/MonetDB.spec b/MonetDB.spec
--- a/MonetDB.spec
+++ b/MonetDB.spec
@@ -527,7 +527,6 @@ exit 0
 %{_libdir}/monetdb5/lib_capi.so
 %endif
 %{_libdir}/monetdb5/lib_generator.so
-%{_libdir}/monetdb5/lib_udf.so
 %doc %{_mandir}/man1/mserver5.1.gz
 %dir %{_datadir}/doc/MonetDB
 %docdir %{_datadir}/doc/MonetDB
@@ -832,6 +831,7 @@ rm -f %{buildroot}%{_libdir}/monetdb5/ru
 rm -f %{buildroot}%{_libdir}/monetdb5/lib_run_*.so
 rm -f %{buildroot}%{_libdir}/monetdb5/microbenchmark.mal
 rm -f %{buildroot}%{_libdir}/monetdb5/lib_microbenchmark*.so
+rm -f %{buildroot}%{_libdir}/monetdb5/lib_udf*.so
 rm -f %{buildroot}%{_bindir}/monetdb_mtest.sh
 rm -rf %{buildroot}%{_datadir}/monetdb # /cmake
 
diff --git a/NT/mksqlwxs.py b/NT/mksqlwxs.py
--- a/NT/mksqlwxs.py
+++ b/NT/mksqlwxs.py
@@ -191,9 +191,9 @@ def main():
 print(r'')
 print(r'  ')
 id = comp(features, id, 16,
-  [r'lib\monetdb5\{}'.format(x) for x in sorted(filter(lambda x: 
x.startswith('_') and x.endswith('.dll') and ('geom' not in x) and ('pyapi' not 
in x) and ('opt_sql_append' not in x) and ('run_' not in x) and 
('microbenchmark' not in x), os.listdir(os.path.join(sys.argv[3], 'lib', 
'monetdb5'])
+  [r'lib\monetdb5\{}'.format(x) for x in sorted(filter(lambda x: 
x.startswith('_') and x.endswith('.dll') and ('geom' not in x) and ('pyapi' not 
in x) and ('opt_sql_append' not in x) and ('run_' not in x) and 
('microbenchmark' not in x) and ('udf' not in x), 
os.listdir(os.path.join(sys.argv[3], 'lib', 'monetdb5'])
 id = comp(debug, id, 16,
-  [r'lib\monetdb5\{}'.format(x) for x in sorted(filter(lambda x: 
x.startswith('_') and x.endswith('.pdb') and ('geom' not in x) and 
('opt_sql_append' not in x) and ('run_' not in x) and ('microbenchmark' not in 
x), os.listdir(os.path.join(sys.argv[3], 'lib', 'monetdb5'])
+  [r'lib\monetdb5\{}'.format(x) for x in sorted(filter(lambda x: 
x.startswith('_') and x.endswith('.pdb') and ('geom' not in x) and 
('opt_sql_append' not in x) and ('run_' not in x) and ('microbenchmark' not in 
x) and ('udf' not in x), os.listdir(os.path.join(sys.argv[3], 'lib', 
'monetdb5'])
 id = comp(geom, id, 16,
   [r'lib\monetdb5\{}'.format(x) for x in sorted(filter(lambda x: 
x.startswith('_') and (x.endswith('.dll') or x.endswith('.pdb')) and ('geom' in 
x), os.listdir(os.path.join(sys.argv[3], 'lib', 'monetdb5'])
 id = comp(pyapi3, id, 16,
diff --git a/clients/Tests/MAL-signatures.stable.out 
b/clients/Tests/MAL-signatures.stable.out
--- a/clients/Tests/MAL-signatures.stable.out
+++ b/clients/Tests/MAL-signatures.stable.out
@@ -1,697 +1,694 @@
-stdout of test 'MAL-signatures` in directory 'clients` itself:
-
-#select * from sys.malfunctions() order by module, "function", address, 
signature, comment;
 % .%1, .%1,.%1,.%1,.%1 # table_name
 % module,  function,   signature,  address,comment # name
 % clob,clob,   clob,   clob,   clob # type
-% 12,  28, 313,42, 0 # length
-[ "aggr",  "all",  "command aggr.all(X_0:bat[:any_1]):any_1 ", 
"SQLall;",  ""  ]
-[ "aggr",  "allnotequal",  "pattern aggr.allnotequal(X_0:bat[:any_1], 
X_1:bat[:any_1]):bit ",  "SQLallnotequal;",  ""  ]
+% 12,  28, 313,42, 860 # length
+[ "aggr",  "all",  "command aggr.all(X_0:bat[:any_1]):any_1 ", 
"SQLall;",  "if all values in b are equal return this, else nil"]
+[ "aggr",  "allnotequal",  "pattern aggr.allnotequal(X_0:bat[:any_1], 
X_1:bat[:any_1]):bit ",  "SQLallnotequal;",  "if all values in r are 
not equal to l return true, else if r has nil nil else false"   ]
 [ "aggr",  "anyequal", "pattern aggr.anyequal(X_0:any_1, 
X_1:any_1):bit ", "CMDvarEQ;",""  ]
-[ "aggr",  "anyequal", "pattern aggr.anyequal(X_0:bat[:any_1], 
X_1:bat[:any_1]):bit ", "SQLanyequal;", ""  ]
-[ "aggr",  "avg",  "command aggr.avg(X_0:bat[:bte], X_1:bat[:oid], 
X_2:bat[:any_1]):bat[:dbl] ",   "AGGRavg13_dbl;",   ""  ]
-[ "aggr",  "avg",  "command aggr.avg(X_0:bat[:dbl], X_1:bat[:oid], 
X_2:bat[:any_1]):bat[:dbl] ",   "AGGRavg13_dbl;",   ""  ]
-[ "aggr",  "avg",  "command aggr.avg(X_0:bat[:flt], X_1:bat[:oid], 
X_2:bat[:any_1]):bat[:dbl] ",   "AGGRavg13_dbl;",   ""  ]
-[ "aggr",  "avg",  "command aggr.avg(X_0:bat[:int], X_1:bat[:oid], 
X_2:bat[:any_1]):bat[:dbl] ",   "AGGRavg13_dbl;",   ""  ]
-[ "aggr",  "avg",  "command aggr.avg(X_0:bat[:ln