Changeset: 5f325c009606 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=5f325c009606 Added Files: gdk/xoshiro256starstar.h sql/test/sample/Tests/All sql/test/sample/Tests/sample.1.sql sql/test/sample/Tests/sample.1.stable.err sql/test/sample/Tests/sample.1.stable.out sql/test/sample/Tests/sample.10.sql sql/test/sample/Tests/sample.10.stable.err sql/test/sample/Tests/sample.10.stable.out sql/test/sample/Tests/sample.11.sql sql/test/sample/Tests/sample.11.stable.err sql/test/sample/Tests/sample.11.stable.out sql/test/sample/Tests/sample.12.sql sql/test/sample/Tests/sample.12.stable.err sql/test/sample/Tests/sample.12.stable.out sql/test/sample/Tests/sample.13.sql sql/test/sample/Tests/sample.13.stable.err sql/test/sample/Tests/sample.13.stable.out sql/test/sample/Tests/sample.14.sql sql/test/sample/Tests/sample.14.stable.err sql/test/sample/Tests/sample.14.stable.out sql/test/sample/Tests/sample.15.sql sql/test/sample/Tests/sample.15.stable.err sql/test/sample/Tests/sample.15.stable.out sql/test/sample/Tests/sample.2.sql sql/test/sample/Tests/sample.2.stable.err sql/test/sample/Tests/sample.2.stable.out sql/test/sample/Tests/sample.3.sql sql/test/sample/Tests/sample.3.stable.err sql/test/sample/Tests/sample.3.stable.out sql/test/sample/Tests/sample.4.sql sql/test/sample/Tests/sample.4.stable.err sql/test/sample/Tests/sample.4.stable.out sql/test/sample/Tests/sample.5.sql sql/test/sample/Tests/sample.5.stable.err sql/test/sample/Tests/sample.5.stable.out sql/test/sample/Tests/sample.6.sql sql/test/sample/Tests/sample.6.stable.err sql/test/sample/Tests/sample.6.stable.out sql/test/sample/Tests/sample.7.sql sql/test/sample/Tests/sample.7.stable.err sql/test/sample/Tests/sample.7.stable.out sql/test/sample/Tests/sample.8.sql sql/test/sample/Tests/sample.8.stable.err sql/test/sample/Tests/sample.8.stable.out sql/test/sample/Tests/sample.9.sql sql/test/sample/Tests/sample.9.stable.err sql/test/sample/Tests/sample.9.stable.out Modified Files: clients/Tests/MAL-signatures.stable.out clients/Tests/MAL-signatures.stable.out.int128 clients/Tests/exports.stable.out gdk/Makefile.ag gdk/gdk.h gdk/gdk_logger.c gdk/gdk_logger.h gdk/gdk_sample.c monetdb5/modules/mal/sample.c monetdb5/modules/mal/sample.h monetdb5/modules/mal/sample.mal sql/ChangeLog sql/backends/monet5/rel_bin.c sql/backends/monet5/sql_statement.c sql/backends/monet5/sql_statement.h sql/server/rel_select.c sql/server/sql_parser.h sql/server/sql_parser.y sql/server/sql_scan.c sql/test/BugTracker/Tests/drop_sequence_crash.SF-1505973.sql sql/test/BugTracker/Tests/drop_sequence_crash_2.SF-1505973.sql Branch: rename-sql Log Message:
Merge with default. diffs (truncated from 2344 to 300 lines): diff --git a/clients/Tests/MAL-signatures.stable.out b/clients/Tests/MAL-signatures.stable.out --- a/clients/Tests/MAL-signatures.stable.out +++ b/clients/Tests/MAL-signatures.stable.out @@ -10897,8 +10897,10 @@ Ready. [ "sabaoth", "marchScenario", "command sabaoth.marchScenario(lang:str):void ", "SABmarchScenario;", "Publishes the given language as available for this server" ] [ "sabaoth", "prelude", "command sabaoth.prelude():void ", "SABprelude;", "Initialise the sabaoth module" ] [ "sabaoth", "retreatScenario", "command sabaoth.retreatScenario(lang:str):void ", "SABretreatScenario;", "Unpublishes the given language as available for this server" ] -[ "sample", "subuniform", "command sample.subuniform(b:bat[:any], s:lng):bat[:oid] ", "SAMPLEuniform;", "Returns the oids of a uniform sample of size s" ] -[ "sample", "subuniform", "command sample.subuniform(b:bat[:any], p:dbl):bat[:oid] ", "SAMPLEuniform_dbl;", "Returns the oids of a uniform sample of size = (p x count(b)), where 0 <= p <= 1.0" ] +[ "sample", "subuniform", "pattern sample.subuniform(b:bat[:any], p:dbl):bat[:oid] ", "SAMPLEuniform;", "Returns the oids of a uniform sample of size = (p x count(b)), where 0 <= p <= 1.0" ] +[ "sample", "subuniform", "pattern sample.subuniform(b:bat[:any], p:dbl, sample_seed:int):bat[:oid] ", "SAMPLEuniform;", "Returns the oids of a uniform sample of size = (p x count(b)), where 0 <= p <= 1.0 and where the prg is seeded with sample_seed" ] +[ "sample", "subuniform", "pattern sample.subuniform(b:bat[:any], sample_size:lng):bat[:oid] ", "SAMPLEuniform;", "Returns the oids of a uniform sample of size s" ] +[ "sample", "subuniform", "pattern sample.subuniform(b:bat[:any], sample_size:lng, sample_seed:int):bat[:oid] ", "SAMPLEuniform;", "Returns the oids of a uniform sample of size s and where the prg is seeded with sample_seed" ] [ "shp", "attach", "pattern shp.attach(filename:str):void ", "SHPattach;", "Register an ESRI Shapefile in the vault catalog" ] [ "shp", "import", "pattern shp.import(fileid:int):void ", "SHPimport;", "Import an ESRI Shapefile with given id into the vault" ] [ "shp", "import", "pattern shp.import(fileid:int, po:wkb):void ", "SHPpartialimport;", "Partially import an ESRI Shapefile with given id into the vault" ] diff --git a/clients/Tests/MAL-signatures.stable.out.int128 b/clients/Tests/MAL-signatures.stable.out.int128 --- a/clients/Tests/MAL-signatures.stable.out.int128 +++ b/clients/Tests/MAL-signatures.stable.out.int128 @@ -15112,8 +15112,10 @@ Ready. [ "sabaoth", "marchScenario", "command sabaoth.marchScenario(lang:str):void ", "SABmarchScenario;", "Publishes the given language as available for this server" ] [ "sabaoth", "prelude", "command sabaoth.prelude():void ", "SABprelude;", "Initialise the sabaoth module" ] [ "sabaoth", "retreatScenario", "command sabaoth.retreatScenario(lang:str):void ", "SABretreatScenario;", "Unpublishes the given language as available for this server" ] -[ "sample", "subuniform", "command sample.subuniform(b:bat[:any], s:lng):bat[:oid] ", "SAMPLEuniform;", "Returns the oids of a uniform sample of size s" ] -[ "sample", "subuniform", "command sample.subuniform(b:bat[:any], p:dbl):bat[:oid] ", "SAMPLEuniform_dbl;", "Returns the oids of a uniform sample of size = (p x count(b)), where 0 <= p <= 1.0" ] +[ "sample", "subuniform", "pattern sample.subuniform(b:bat[:any], p:dbl):bat[:oid] ", "SAMPLEuniform;", "Returns the oids of a uniform sample of size = (p x count(b)), where 0 <= p <= 1.0" ] +[ "sample", "subuniform", "pattern sample.subuniform(b:bat[:any], p:dbl, sample_seed:int):bat[:oid] ", "SAMPLEuniform;", "Returns the oids of a uniform sample of size = (p x count(b)), where 0 <= p <= 1.0 and where the prg is seeded with sample_seed" ] +[ "sample", "subuniform", "pattern sample.subuniform(b:bat[:any], sample_size:lng):bat[:oid] ", "SAMPLEuniform;", "Returns the oids of a uniform sample of size s" ] +[ "sample", "subuniform", "pattern sample.subuniform(b:bat[:any], sample_size:lng, sample_seed:int):bat[:oid] ", "SAMPLEuniform;", "Returns the oids of a uniform sample of size s and where the prg is seeded with sample_seed" ] [ "shp", "attach", "pattern shp.attach(filename:str):void ", "SHPattach;", "Register an ESRI Shapefile in the vault catalog" ] [ "shp", "import", "pattern shp.import(fileid:int):void ", "SHPimport;", "Import an ESRI Shapefile with given id into the vault" ] [ "shp", "import", "pattern shp.import(fileid:int, po:wkb):void ", "SHPpartialimport;", "Partially import an ESRI Shapefile with given id into the vault" ] diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out --- a/clients/Tests/exports.stable.out +++ b/clients/Tests/exports.stable.out @@ -164,6 +164,7 @@ gdk_return BATreplace(BAT *b, BAT *p, BA void BATrmprop(BAT *b, int idx); gdk_return BATroles(BAT *b, const char *tnme); BAT *BATsample(BAT *b, BUN n); +BAT *BATsample_with_seed(BAT *b, BUN n, unsigned seed); BAT *BATselect(BAT *b, BAT *s, const void *tl, const void *th, bool li, bool hi, bool anti); gdk_return BATsemijoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, BUN estimate) __attribute__((__warn_unused_result__)); gdk_return BATsetaccess(BAT *b, int mode); @@ -1731,8 +1732,7 @@ str SABmarchConnection(void *ret, str *h str SABmarchScenario(void *ret, str *lang); str SABprelude(void *ret); str SABretreatScenario(void *ret, str *lang); -str SAMPLEuniform(bat *r, bat *b, lng *s); -str SAMPLEuniform_dbl(bat *r, bat *b, dbl *p); +str SAMPLEuniform(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); str SERVERbindBAT(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); str SERVERclient(void *res, const Stream *In, const Stream *Out); str SERVERconnect(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pc); diff --git a/gdk/Makefile.ag b/gdk/Makefile.ag --- a/gdk/Makefile.ag +++ b/gdk/Makefile.ag @@ -25,7 +25,7 @@ lib_gdk = { gdk_qsort.c gdk_qsort_impl.h \ gdk_storage.c gdk_bat.c \ gdk_delta.c gdk_cross.c gdk_system.c gdk_value.c \ - gdk_posix.c gdk_logger.c gdk_sample.c \ + gdk_posix.c gdk_logger.c gdk_sample.c xoshiro256starstar.h \ gdk_private.h gdk_delta.h gdk_logger.h gdk_posix.h \ gdk_system.h gdk_system_private.h gdk_tm.h gdk_storage.h \ gdk_group.c \ diff --git a/gdk/gdk.h b/gdk/gdk.h --- a/gdk/gdk.h +++ b/gdk/gdk.h @@ -2763,6 +2763,7 @@ gdk_export gdk_return BATfirstn(BAT **to * */ gdk_export BAT *BATsample(BAT *b, BUN n); +gdk_export BAT *BATsample_with_seed(BAT *b, BUN n, unsigned seed); /* * diff --git a/gdk/gdk_logger.c b/gdk/gdk_logger.c --- a/gdk/gdk_logger.c +++ b/gdk/gdk_logger.c @@ -339,10 +339,9 @@ static gdk_return log_write_id(logger *l, char tpe, oid id) { lng lid = id; - if (lid < 0 || lid > 1000000) - assert(0); + assert(lid >= 0); if (mnstr_writeChr(l->log, tpe) && - mnstr_writeLng(l->log, id)) + mnstr_writeLng(l->log, lid)) return GDK_SUCCEED; fprintf(stderr, "!ERROR: log_write_id: write failed\n"); return GDK_FAIL; @@ -422,8 +421,9 @@ log_read_updates(logger *lg, trans *tr, for (i = 0; i < tr->nr; i++) { if (tr->changes[i].type == LOG_CREATE && - ((!tpe && strcmp(tr->changes[i].name, name) == 0) || - (tpe && tr->changes[i].tpe == tpe && tr->changes[i].cid == id))) { + (tpe == 0 + ? strcmp(tr->changes[i].name, name) == 0 + : tr->changes[i].tpe == tpe && tr->changes[i].cid == id)) { ht = tr->changes[i].ht; if (ht < 0) { ht = TYPE_void; @@ -646,6 +646,7 @@ la_bat_destroy(logger *lg, logaction *la return GDK_FAIL; if ((p = log_find(lg->snapshots_bid, lg->dsnapshots, bid)) != BUN_NONE) { + oid pos = (oid) p; #ifndef NDEBUG assert(BBP_desc(bid)->batRole == PERSISTENT); assert(0 <= BBP_desc(bid)->theap.farmid && BBP_desc(bid)->theap.farmid < MAXFARMS); @@ -655,7 +656,7 @@ la_bat_destroy(logger *lg, logaction *la assert(BBPfarms[BBP_desc(bid)->tvheap->farmid].roles & (1 << PERSISTENT)); } #endif - if (BUNappend(lg->dsnapshots, &p, false) != GDK_SUCCEED) + if (BUNappend(lg->dsnapshots, &pos, false) != GDK_SUCCEED) return GDK_FAIL; } } @@ -1784,7 +1785,6 @@ logger_load(int debug, const char *fn, c goto error; } - /* TODO add upgrade code !! */ snprintf(bak, sizeof(bak), "%s_catalog_tpe", fn); catalog_tpe = BBPindex(bak); t = BATdescriptor(catalog_tpe); @@ -2797,8 +2797,7 @@ log_bat_transient(logger *lg, const char } if (log_write_format(lg, &l) != GDK_SUCCEED || - (tpe && log_write_id(lg, tpe, id) != GDK_SUCCEED) || - (!tpe && log_write_string(lg, name) != GDK_SUCCEED)) { + (tpe ? log_write_id(lg, tpe, id) : log_write_string(lg, name)) != GDK_SUCCEED) { fprintf(stderr, "!ERROR: log_bat_transient: write failed\n"); return GDK_FAIL; } @@ -2833,8 +2832,7 @@ log_delta(logger *lg, BAT *uid, BAT *uva l.flag = (tpe)?LOG_UPDATE_ID:LOG_UPDATE; if (log_write_format(lg, &l) != GDK_SUCCEED || - (tpe && log_write_id(lg, tpe, id) != GDK_SUCCEED) || - (!tpe && log_write_string(lg, name) != GDK_SUCCEED)) + (tpe ? log_write_id(lg, tpe, id) : log_write_string(lg, name)) != GDK_SUCCEED) return GDK_FAIL; for (p = 0; p < BUNlast(uid) && ok == GDK_SUCCEED; p++) { @@ -2876,8 +2874,7 @@ log_bat(logger *lg, BAT *b, const char * l.flag = tpe?LOG_INSERT_ID:LOG_INSERT; if (log_write_format(lg, &l) != GDK_SUCCEED || - (tpe && log_write_id(lg, tpe, id) != GDK_SUCCEED) || - (!tpe && log_write_string(lg, name) != GDK_SUCCEED)) + (tpe ? log_write_id(lg, tpe, id) : log_write_string(lg, name)) != GDK_SUCCEED) return GDK_FAIL; if (b->ttype > TYPE_void && @@ -2919,8 +2916,7 @@ log_bat_clear(logger *lg, const char *na l.flag = (tpe)?LOG_CLEAR_ID:LOG_CLEAR; if (log_write_format(lg, &l) != GDK_SUCCEED || - (tpe && log_write_id(lg, tpe, id) != GDK_SUCCEED) || - (!tpe && log_write_string(lg, name) != GDK_SUCCEED)) + (tpe ? log_write_id(lg, tpe, id) : log_write_string(lg, name)) != GDK_SUCCEED) return GDK_FAIL; if (lg->debug & 1) @@ -3187,6 +3183,7 @@ gdk_return logger_add_bat(logger *lg, BAT *b, const char *name, char tpe, oid id) { log_bid bid = logger_find_bat(lg, name, tpe, id); + lng lid = (lng) id; assert(b->batRestricted > 0 || b == lg->snapshots_bid || @@ -3217,7 +3214,7 @@ logger_add_bat(logger *lg, BAT *b, const if (BUNappend(lg->catalog_bid, &bid, false) != GDK_SUCCEED || BUNappend(lg->catalog_nme, name, false) != GDK_SUCCEED || BUNappend(lg->catalog_tpe, &tpe, false) != GDK_SUCCEED || - BUNappend(lg->catalog_oid, &id, false) != GDK_SUCCEED) + BUNappend(lg->catalog_oid, &lid, false) != GDK_SUCCEED) return GDK_FAIL; BBPretain(bid); return GDK_SUCCEED; @@ -3229,13 +3226,14 @@ logger_upgrade_bat(logger *lg, const cha log_bid bid = logger_find_bat(lg, name, tpe, id); if (bid) { - BUN p = log_find(lg->catalog_bid, lg->dcatalog, bid); + oid p = (oid) log_find(lg->catalog_bid, lg->dcatalog, bid); + lng lid = (lng) id; if (BUNappend(lg->dcatalog, &p, false) != GDK_SUCCEED || BUNappend(lg->catalog_bid, &bid, false) != GDK_SUCCEED || BUNappend(lg->catalog_nme, name, false) != GDK_SUCCEED || BUNappend(lg->catalog_tpe, &tpe, false) != GDK_SUCCEED || - BUNappend(lg->catalog_oid, &id, false) != GDK_SUCCEED) + BUNappend(lg->catalog_oid, &lid, false) != GDK_SUCCEED) return GDK_FAIL; } return GDK_SUCCEED; @@ -3246,6 +3244,7 @@ logger_del_bat(logger *lg, log_bid bid) { BAT *b = BATdescriptor(bid); BUN p = log_find(lg->catalog_bid, lg->dcatalog, bid), q; + oid pos; assert(p != BUN_NONE); if (p == BUN_NONE) { @@ -3258,8 +3257,8 @@ logger_del_bat(logger *lg, log_bid bid) * transient */ if (p >= lg->catalog_bid->batInserted && (q = log_find(lg->snapshots_bid, lg->dsnapshots, bid)) != BUN_NONE) { - - if (BUNappend(lg->dsnapshots, &q, false) != GDK_SUCCEED) { + pos = (oid) q; + if (BUNappend(lg->dsnapshots, &pos, false) != GDK_SUCCEED) { logbat_destroy(b); return GDK_FAIL; } @@ -3283,7 +3282,8 @@ logger_del_bat(logger *lg, log_bid bid) lg->changes += BATcount(b) + 1; BBPunfix(b->batCacheid); } - return BUNappend(lg->dcatalog, &p, false); + pos = (oid) p; + return BUNappend(lg->dcatalog, &pos, false); /*assert(BBP_lrefs(bid) == 0);*/ } @@ -3306,7 +3306,8 @@ logger_find_bat(logger *lg, const char * BUN p; if (BAThash(lg->catalog_oid) == GDK_SUCCEED) { - HASHloop_lng(cni, cni.b->thash, p, &id) { + lng lid = (lng) id; + HASHloop_lng(cni, cni.b->thash, p, &lid) { oid pos = p; if (*(char*)Tloc(lg->catalog_tpe, p) == tpe) { if (BUNfnd(lg->dcatalog, &pos) == BUN_NONE) diff --git a/gdk/gdk_sample.c b/gdk/gdk_sample.c --- a/gdk/gdk_sample.c +++ b/gdk/gdk_sample.c @@ -26,6 +26,7 @@ #include "monetdb_config.h" #include "gdk.h" #include "gdk_private.h" +#include "xoshiro256starstar.h" #undef BATsample @@ -108,7 +109,7 @@ OIDTreeToBATAntiset(struct oidtreenode * /* BATsample implements sampling for void headed BATs */ BAT * -BATsample(BAT *b, BUN n) +BATsample_with_seed(BAT *b, BUN n, unsigned seed) { BAT *bn; BUN cnt, slen; @@ -128,6 +129,9 @@ BATsample(BAT *b, BUN n) } else { oid minoid = b->hseqbase; oid maxoid = b->hseqbase + cnt; + random_state_engine rse; + + /* if someone samples more than half of our tree, we * do the antiset */ bool antiset = n > cnt / 2; @@ -144,12 +148,17 @@ BATsample(BAT *b, BUN n) GDKfree(tree); return NULL; } + + init_random_state_engine(&rse, seed); + /* while we do not have enough sample OIDs yet */ for (rescnt = 0; rescnt < n; rescnt++) { oid candoid; do { + double random_double = next_double(rse); + /* generate a new random OID */ - candoid = (oid) (minoid + DRAND * (maxoid - minoid)); + candoid = (oid) (minoid + random_double * (maxoid - minoid)); /* if that candidate OID was already * generated, try again */ } while (!OIDTreeMaybeInsert(tree, candoid, rescnt)); @@ -172,3 +181,11 @@ BATsample(BAT *b, BUN n) ALGOBATPAR(b), n, ALGOOPTBATPAR(bn)); return bn; } + +BAT * +BATsample(BAT *b, BUN n) +{ + unsigned some_random_seed = (unsigned) rand(); _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list