Changeset: 67bddbf2ee39 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=67bddbf2ee39 Modified Files: gdk/gdk_sample.c sql/backends/monet5/Tests/sample00.sql sql/backends/monet5/Tests/sample01.sql sql/backends/monet5/Tests/sample02.sql sql/backends/monet5/Tests/sample03.sql sql/server/rel_optimizer.c sql/server/rel_rel.c sql/server/rel_select.c Branch: stratified_sampling Log Message:
Don't optimize away columns that we need. diffs (198 lines): diff --git a/gdk/gdk_sample.c b/gdk/gdk_sample.c --- a/gdk/gdk_sample.c +++ b/gdk/gdk_sample.c @@ -255,6 +255,8 @@ BATweightedsample(BAT *b, BUN n, BAT *w) oid minoid = b->hseqbase; + ERRORcheck(n > BATcount(b), "BATsample: Sample size bigger than table!", NULL); + BATcheck(b, "BATsample", NULL); BATcheck(w, "BATsample", NULL); @@ -312,6 +314,10 @@ BATweightedsample(BAT *b, BUN n, BAT *w) } oids[i] = (oid)(j + minoid); keys[i] = pow(mtwist_drand(mt_rng), 1.0 / w_ptr[j]);//TODO cast 1.0 to dbl? + if (keys[i] == 1) { + GDKerror("BATsample: weight overflow\n"); + goto bailout; + } i++; } diff --git a/sql/backends/monet5/Tests/sample00.sql b/sql/backends/monet5/Tests/sample00.sql --- a/sql/backends/monet5/Tests/sample00.sql +++ b/sql/backends/monet5/Tests/sample00.sql @@ -2,7 +2,6 @@ START TRANSACTION; CREATE TABLE vals(w DOUBLE, value INTEGER); - INSERT INTO vals VALUES (1, 100), (0, 50); SELECT * FROM vals SAMPLE 1 USING WEIGHTS w; @@ -15,7 +14,6 @@ ROLLBACK; START TRANSACTION; CREATE TABLE vals(w DECIMAL(10,3), value INTEGER); - INSERT INTO vals VALUES (10.77, 100), (0, 50); SELECT * FROM vals SAMPLE 1 USING WEIGHTS w; diff --git a/sql/backends/monet5/Tests/sample01.sql b/sql/backends/monet5/Tests/sample01.sql --- a/sql/backends/monet5/Tests/sample01.sql +++ b/sql/backends/monet5/Tests/sample01.sql @@ -5,7 +5,8 @@ CREATE TABLE vals(id DOUBLE, type INTEGE INSERT INTO vals VALUES (1, 100), (0, 50); -SELECT type, COUNT(*) FROM vals GROUP BY type SAMPLE 1 USING WEIGHTS cast(id AS double); +plan SELECT type FROM vals GROUP BY type SAMPLE 1 USING WEIGHTS id; +SELECT type FROM vals GROUP BY type SAMPLE 1 USING WEIGHTS id; ROLLBACK; diff --git a/sql/backends/monet5/Tests/sample02.sql b/sql/backends/monet5/Tests/sample02.sql --- a/sql/backends/monet5/Tests/sample02.sql +++ b/sql/backends/monet5/Tests/sample02.sql @@ -6,7 +6,7 @@ CREATE TABLE vals(id DOUBLE, type INTEGE INSERT INTO vals VALUES (1, 100), (0, 50); -SELECT SUM(id) FROM vals SAMPLE 1 USING WEIGHTS cast(type AS double); +SELECT SUM(id) FROM vals SAMPLE 1 USING WEIGHTS id; ROLLBACK; diff --git a/sql/backends/monet5/Tests/sample03.sql b/sql/backends/monet5/Tests/sample03.sql --- a/sql/backends/monet5/Tests/sample03.sql +++ b/sql/backends/monet5/Tests/sample03.sql @@ -8,6 +8,15 @@ INSERT INTO vals VALUES (1, 100), (-1, 5 SELECT * FROM vals SAMPLE 1 USING WEIGHTS w; +ROLLBACK; + +# fewer weights are non-zero than size of sample +START TRANSACTION; + +CREATE TABLE vals(w DOUBLE, value INTEGER); +INSERT INTO vals VALUES (1, 100), (0, 50); + +SELECT * FROM vals SAMPLE 2 USING WEIGHTS w; ROLLBACK; @@ -20,3 +29,45 @@ INSERT INTO vals VALUES ('#1 sample', 10 SELECT * FROM vals SAMPLE 1 USING WEIGHTS w; ROLLBACK; + +# sample size bigger than amount of elements +START TRANSACTION; + +CREATE TABLE vals(w DOUBLE, value INTEGER); +INSERT INTO vals VALUES (1, 100), (1, 50); + +SELECT * FROM vals SAMPLE 10 USING WEIGHTS w; + +ROLLBACK; + +# weight overflow +START TRANSACTION; + +CREATE TABLE vals(w DOUBLE, value INTEGER); +INSERT INTO vals VALUES (1.7976931348623157E+306, 100), (0, 50); + +SELECT * FROM vals SAMPLE 1 USING WEIGHTS w; + +ROLLBACK; + +# intigus maximus +START TRANSACTION; + +CREATE TABLE vals(w DOUBLE, value INTEGER); +INSERT INTO vals VALUES (1, 100), (0, 50); + + +SELECT * FROM vals SAMPLE 18446744073709551617 USING WEIGHTS w; + +ROLLBACK; + +# biggus samplus +START TRANSACTION; + +CREATE TABLE vals(w DOUBLE, value INTEGER); +INSERT INTO vals VALUES (1, 100), (0, 50); + + +SELECT * FROM vals SAMPLE 2147483648 USING WEIGHTS w; + +ROLLBACK; diff --git a/sql/server/rel_optimizer.c b/sql/server/rel_optimizer.c --- a/sql/server/rel_optimizer.c +++ b/sql/server/rel_optimizer.c @@ -5783,8 +5783,13 @@ rel_mark_used(mvc *sql, sql_rel *rel, in } break; + case op_sample: + if (rel->exps->cnt == 2) { + // weighted sample + exps_mark_used(sql->sa, rel, rel->l); + } + /* fall through */ case op_topn: - case op_sample: if (proj) { rel = rel ->l; rel_mark_used(sql, rel, proj); diff --git a/sql/server/rel_rel.c b/sql/server/rel_rel.c --- a/sql/server/rel_rel.c +++ b/sql/server/rel_rel.c @@ -401,7 +401,7 @@ rel_crossproduct(sql_allocator *sa, sql_ } sql_rel * -rel_topn(sql_allocator *sa, sql_rel *l, list *exps ) +rel_topn(sql_allocator *sa, sql_rel *l, list *exps) { sql_rel *rel = rel_create(sa); @@ -415,7 +415,7 @@ rel_topn(sql_allocator *sa, sql_rel *l, } sql_rel * -rel_sample(sql_allocator *sa, sql_rel *l, list *exps ) +rel_sample(sql_allocator *sa, sql_rel *l, list *exps) { sql_rel *rel = rel_create(sa); diff --git a/sql/server/rel_select.c b/sql/server/rel_select.c --- a/sql/server/rel_select.c +++ b/sql/server/rel_select.c @@ -4716,6 +4716,11 @@ rel_select_exp(mvc *sql, sql_rel *rel, S exp_kind iek = {type_value, card_column, FALSE}; symbol* weights = l->h->next->data.sym; sql_exp* weights_exp = rel_value_exp(sql, &rel, weights, 0, iek); + if (!weights_exp) { + if (sql->errstr[0] == 0) + return sql_error(sql, 02, "Samplu failu"); + return NULL; + } if(l->h->type == type_lng) { lng sample_size = l->h->data.l_val; @@ -4737,6 +4742,11 @@ rel_select_exp(mvc *sql, sql_rel *rel, S append(exps, o); } rel = rel_sample(sql->sa, rel, exps); + if (!rel) { + if (sql->errstr[0] == 0) + return sql_error(sql, 02, "Samplu failu"); + return NULL; + } } if (rel) { if (rel && sn->groupby) { _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list