Changeset: 67bddbf2ee39 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=67bddbf2ee39
Modified Files:
        gdk/gdk_sample.c
        sql/backends/monet5/Tests/sample00.sql
        sql/backends/monet5/Tests/sample01.sql
        sql/backends/monet5/Tests/sample02.sql
        sql/backends/monet5/Tests/sample03.sql
        sql/server/rel_optimizer.c
        sql/server/rel_rel.c
        sql/server/rel_select.c
Branch: stratified_sampling
Log Message:

Don't optimize away columns that we need.


diffs (198 lines):

diff --git a/gdk/gdk_sample.c b/gdk/gdk_sample.c
--- a/gdk/gdk_sample.c
+++ b/gdk/gdk_sample.c
@@ -255,6 +255,8 @@ BATweightedsample(BAT *b, BUN n, BAT *w)
 
        oid minoid = b->hseqbase;
 
+       ERRORcheck(n > BATcount(b), "BATsample: Sample size bigger than 
table!", NULL);
+
        BATcheck(b, "BATsample", NULL);
        BATcheck(w, "BATsample", NULL);
 
@@ -312,6 +314,10 @@ BATweightedsample(BAT *b, BUN n, BAT *w)
                }
                oids[i] = (oid)(j + minoid);
                keys[i] = pow(mtwist_drand(mt_rng), 1.0 / w_ptr[j]);//TODO cast 
1.0 to dbl?
+               if (keys[i] == 1) {
+                       GDKerror("BATsample: weight overflow\n");
+                       goto bailout;
+               }
                i++;
        }
 
diff --git a/sql/backends/monet5/Tests/sample00.sql 
b/sql/backends/monet5/Tests/sample00.sql
--- a/sql/backends/monet5/Tests/sample00.sql
+++ b/sql/backends/monet5/Tests/sample00.sql
@@ -2,7 +2,6 @@
 START TRANSACTION;
 
 CREATE TABLE vals(w DOUBLE, value INTEGER);
-
 INSERT INTO vals VALUES (1, 100), (0, 50);
 
 SELECT * FROM vals SAMPLE 1 USING WEIGHTS w;
@@ -15,7 +14,6 @@ ROLLBACK;
 START TRANSACTION;
 
 CREATE TABLE vals(w DECIMAL(10,3), value INTEGER);
-
 INSERT INTO vals VALUES (10.77, 100), (0, 50);
 
 SELECT * FROM vals SAMPLE 1 USING WEIGHTS w;
diff --git a/sql/backends/monet5/Tests/sample01.sql 
b/sql/backends/monet5/Tests/sample01.sql
--- a/sql/backends/monet5/Tests/sample01.sql
+++ b/sql/backends/monet5/Tests/sample01.sql
@@ -5,7 +5,8 @@ CREATE TABLE vals(id DOUBLE, type INTEGE
 
 INSERT INTO vals VALUES (1, 100), (0, 50);
 
-SELECT type, COUNT(*) FROM vals GROUP BY type SAMPLE 1 USING WEIGHTS cast(id 
AS double);
+plan SELECT type FROM vals GROUP BY type SAMPLE 1 USING WEIGHTS id;
+SELECT type FROM vals GROUP BY type SAMPLE 1 USING WEIGHTS id;
 
 ROLLBACK;
 
diff --git a/sql/backends/monet5/Tests/sample02.sql 
b/sql/backends/monet5/Tests/sample02.sql
--- a/sql/backends/monet5/Tests/sample02.sql
+++ b/sql/backends/monet5/Tests/sample02.sql
@@ -6,7 +6,7 @@ CREATE TABLE vals(id DOUBLE, type INTEGE
 
 INSERT INTO vals VALUES (1, 100), (0, 50);
 
-SELECT SUM(id) FROM vals SAMPLE 1 USING WEIGHTS cast(type AS double);
+SELECT SUM(id) FROM vals SAMPLE 1 USING WEIGHTS id;
 
 ROLLBACK;
 
diff --git a/sql/backends/monet5/Tests/sample03.sql 
b/sql/backends/monet5/Tests/sample03.sql
--- a/sql/backends/monet5/Tests/sample03.sql
+++ b/sql/backends/monet5/Tests/sample03.sql
@@ -8,6 +8,15 @@ INSERT INTO vals VALUES (1, 100), (-1, 5
 
 SELECT * FROM vals SAMPLE 1 USING WEIGHTS w;
 
+ROLLBACK;
+
+# fewer weights are non-zero than size of sample
+START TRANSACTION;
+
+CREATE TABLE vals(w DOUBLE, value INTEGER);
+INSERT INTO vals VALUES (1, 100), (0, 50);
+
+SELECT * FROM vals SAMPLE 2 USING WEIGHTS w;
 
 ROLLBACK;
 
@@ -20,3 +29,45 @@ INSERT INTO vals VALUES ('#1 sample', 10
 SELECT * FROM vals SAMPLE 1 USING WEIGHTS w;
 
 ROLLBACK;
+
+# sample size bigger than amount of elements
+START TRANSACTION;
+
+CREATE TABLE vals(w DOUBLE, value INTEGER);
+INSERT INTO vals VALUES (1, 100), (1, 50);
+
+SELECT * FROM vals SAMPLE 10 USING WEIGHTS w;
+
+ROLLBACK;
+
+# weight overflow
+START TRANSACTION;
+
+CREATE TABLE vals(w DOUBLE, value INTEGER);
+INSERT INTO vals VALUES (1.7976931348623157E+306, 100), (0, 50);
+
+SELECT * FROM vals SAMPLE 1 USING WEIGHTS w;
+
+ROLLBACK;
+
+# intigus maximus
+START TRANSACTION;
+
+CREATE TABLE vals(w DOUBLE, value INTEGER);
+INSERT INTO vals VALUES (1, 100), (0, 50);
+
+
+SELECT * FROM vals SAMPLE 18446744073709551617 USING WEIGHTS w;
+
+ROLLBACK;
+
+# biggus samplus
+START TRANSACTION;
+
+CREATE TABLE vals(w DOUBLE, value INTEGER);
+INSERT INTO vals VALUES (1, 100), (0, 50);
+
+
+SELECT * FROM vals SAMPLE 2147483648 USING WEIGHTS w;
+
+ROLLBACK;
diff --git a/sql/server/rel_optimizer.c b/sql/server/rel_optimizer.c
--- a/sql/server/rel_optimizer.c
+++ b/sql/server/rel_optimizer.c
@@ -5783,8 +5783,13 @@ rel_mark_used(mvc *sql, sql_rel *rel, in
                }
                break;
 
+       case op_sample:
+               if (rel->exps->cnt == 2) {
+                       // weighted sample
+                       exps_mark_used(sql->sa, rel, rel->l);
+               }
+               /* fall through */
        case op_topn:
-       case op_sample:
                if (proj) {
                        rel = rel ->l;
                        rel_mark_used(sql, rel, proj);
diff --git a/sql/server/rel_rel.c b/sql/server/rel_rel.c
--- a/sql/server/rel_rel.c
+++ b/sql/server/rel_rel.c
@@ -401,7 +401,7 @@ rel_crossproduct(sql_allocator *sa, sql_
 }
 
 sql_rel *
-rel_topn(sql_allocator *sa, sql_rel *l, list *exps )
+rel_topn(sql_allocator *sa, sql_rel *l, list *exps)
 {
        sql_rel *rel = rel_create(sa);
 
@@ -415,7 +415,7 @@ rel_topn(sql_allocator *sa, sql_rel *l, 
 }
 
 sql_rel *
-rel_sample(sql_allocator *sa, sql_rel *l, list *exps )
+rel_sample(sql_allocator *sa, sql_rel *l, list *exps)
 {
        sql_rel *rel = rel_create(sa);
 
diff --git a/sql/server/rel_select.c b/sql/server/rel_select.c
--- a/sql/server/rel_select.c
+++ b/sql/server/rel_select.c
@@ -4716,6 +4716,11 @@ rel_select_exp(mvc *sql, sql_rel *rel, S
                        exp_kind iek = {type_value, card_column, FALSE};
                        symbol* weights = l->h->next->data.sym;
                        sql_exp* weights_exp = rel_value_exp(sql, &rel, 
weights, 0, iek);
+                       if (!weights_exp) {
+                               if (sql->errstr[0] == 0)
+                                       return sql_error(sql, 02, "Samplu 
failu");
+                               return NULL;
+                       }
 
                        if(l->h->type == type_lng) {
                                lng sample_size = l->h->data.l_val;
@@ -4737,6 +4742,11 @@ rel_select_exp(mvc *sql, sql_rel *rel, S
                        append(exps, o);
                }
                rel = rel_sample(sql->sa, rel, exps);
+               if (!rel) {
+                       if (sql->errstr[0] == 0)
+                               return sql_error(sql, 02, "Samplu failu");
+                       return NULL;
+               }
        }
        if (rel) {
                if (rel && sn->groupby) {
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to