Changeset: 78eca92f6d68 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=78eca92f6d68
Modified Files:
        sql/benchmarks/tpch/Tests/16-explain.stable.out
        sql/benchmarks/tpch/Tests/16-plan.stable.out
        sql/server/rel_optimizer.c
Branch: Jan2014
Log Message:

rewrite group by distinct


diffs (185 lines):

diff --git a/sql/benchmarks/tpch/Tests/16-explain.stable.out 
b/sql/benchmarks/tpch/Tests/16-explain.stable.out
--- a/sql/benchmarks/tpch/Tests/16-explain.stable.out
+++ b/sql/benchmarks/tpch/Tests/16-explain.stable.out
@@ -123,36 +123,41 @@ function user.s2_1{autoCommit=true}(A0:s
     X_105:bat[:oid,:int]  := algebra.leftfetchjoinPath(X_97,X_87,X_104);
     (X_106,r1_121) := algebra.join(X_85,X_105);
     X_108 := algebra.tdiff(X_86,X_106);
-    X_196 := algebra.leftfetchjoin(X_108,r1_83);
-    X_109:bat[:oid,:str]  := algebra.leftfetchjoinPath(X_196,X_70,X_42);
-    X_110:bat[:oid,:str]  := algebra.leftfetchjoinPath(X_196,X_70,X_51);
-    X_111:bat[:oid,:int]  := algebra.leftfetchjoinPath(X_196,X_70,X_35);
-    (X_112,r1_133,r2_133) := group.subgroup(X_111);
-    (X_115,r1_136,r2_136) := group.subgroup(X_110,X_112);
-    (X_118,r1_139,r2_139) := group.subgroupdone(X_109,X_115);
-    X_121 := algebra.leftfetchjoin(r1_139,X_111);
-    X_122 := algebra.leftfetchjoin(r1_139,X_109);
-    X_123 := algebra.leftfetchjoin(r1_139,X_110);
-    X_124 := algebra.leftfetchjoin(X_108,X_85);
-    (X_125,r1_146,r2_146) := group.subgroupdone(X_124,X_118);
-    X_128 := algebra.leftfetchjoin(r1_146,X_124);
-    X_129 := algebra.leftfetchjoin(r1_146,X_118);
-    X_130:bat[:oid,:wrd]  := aggr.subcount(X_128,X_129,r1_139,true);
-    (X_132,r1_153,r2_153) := algebra.subsort(X_130,true,false);
-    (X_136,r1_157,r2_157) := algebra.subsort(X_123,r1_153,r2_153,false,false);
-    (X_139,r1_160,r2_160) := algebra.subsort(X_122,r1_157,r2_157,false,false);
-    (X_142,r1_163,r2_163) := algebra.subsort(X_121,r1_160,r2_160,false,false);
-    X_145 := algebra.leftfetchjoin(r1_163,X_123);
-    X_148 := algebra.leftfetchjoin(r1_163,X_130);
-    X_147 := algebra.leftfetchjoin(r1_163,X_121);
-    X_146 := algebra.leftfetchjoin(r1_163,X_122);
-    X_149 := sql.resultSet(4,1,X_145);
-    sql.rsColumn(X_149,"sys.part","p_brand","varchar",10,0,X_145);
-    sql.rsColumn(X_149,"sys.part","p_type","varchar",25,0,X_146);
-    sql.rsColumn(X_149,"sys.part","p_size","int",32,0,X_147);
-    sql.rsColumn(X_149,"sys.L4","supplier_cnt","wrd",64,0,X_148);
-    X_170 := io.stdout();
-    sql.exportResult(X_170,X_149);
+    X_207 := algebra.leftfetchjoin(X_108,r1_83);
+    X_109:bat[:oid,:str]  := algebra.leftfetchjoinPath(X_207,X_70,X_42);
+    X_110:bat[:oid,:str]  := algebra.leftfetchjoinPath(X_207,X_70,X_51);
+    X_111:bat[:oid,:int]  := algebra.leftfetchjoinPath(X_207,X_70,X_35);
+    X_112 := algebra.leftfetchjoin(X_108,X_85);
+    (X_113,r1_134,r2_134) := group.subgroup(X_112);
+    (X_116,r1_137,r2_137) := group.subgroup(X_111,X_113);
+    (X_119,r1_140,r2_140) := group.subgroup(X_110,X_116);
+    (X_122,r1_143,r2_143) := group.subgroupdone(X_109,X_119);
+    X_125 := algebra.leftfetchjoin(r1_143,X_109);
+    X_126 := algebra.leftfetchjoin(r1_143,X_110);
+    X_127 := algebra.leftfetchjoin(r1_143,X_111);
+    X_140 := algebra.leftfetchjoin(r1_143,X_112);
+    (X_128,r1_149,r2_149) := group.subgroup(X_127);
+    (X_131,r1_152,r2_152) := group.subgroup(X_126,X_128);
+    (X_134,r1_155,r2_155) := group.subgroupdone(X_125,X_131);
+    X_137 := algebra.leftfetchjoin(r1_155,X_127);
+    X_138 := algebra.leftfetchjoin(r1_155,X_125);
+    X_139 := algebra.leftfetchjoin(r1_155,X_126);
+    X_141:bat[:oid,:wrd]  := aggr.subcount(X_140,X_134,r1_155,true);
+    (X_143,r1_164,r2_164) := algebra.subsort(X_141,true,false);
+    (X_147,r1_168,r2_168) := algebra.subsort(X_139,r1_164,r2_164,false,false);
+    (X_150,r1_171,r2_171) := algebra.subsort(X_138,r1_168,r2_168,false,false);
+    (X_153,r1_174,r2_174) := algebra.subsort(X_137,r1_171,r2_171,false,false);
+    X_156 := algebra.leftfetchjoin(r1_174,X_139);
+    X_159 := algebra.leftfetchjoin(r1_174,X_141);
+    X_158 := algebra.leftfetchjoin(r1_174,X_137);
+    X_157 := algebra.leftfetchjoin(r1_174,X_138);
+    X_160 := sql.resultSet(4,1,X_156);
+    sql.rsColumn(X_160,"sys.part","p_brand","varchar",10,0,X_156);
+    sql.rsColumn(X_160,"sys.part","p_type","varchar",25,0,X_157);
+    sql.rsColumn(X_160,"sys.part","p_size","int",32,0,X_158);
+    sql.rsColumn(X_160,"sys.L4","supplier_cnt","wrd",64,0,X_159);
+    X_181 := io.stdout();
+    sql.exportResult(X_181,X_160);
 end s2_1;
 # querylog.define("explain 
select\n\tp_brand,\n\tp_type,\n\tp_size,\n\tcount(distinct ps_suppkey) as 
supplier_cnt\nfrom\n\tpartsupp,\n\tpart\nwhere\n\tp_partkey = ps_partkey\n\tand 
p_brand <> \\'Brand#45\\'\n\tand p_type not like \\'MEDIUM POLISHED%\\'\n\tand 
p_size in (49, 14, 23, 45, 19, 3, 36, 9)\n\tand ps_suppkey not in 
(\n\t\tselect\n\t\t\ts_suppkey\n\t\tfrom\n\t\t\tsupplier\n\t\twhere\n\t\t\ts_comment
 like \\'%Customer%Complaints%\\'\n\t)\ngroup 
by\n\tp_brand,\n\tp_type,\n\tp_size\norder by\n\tsupplier_cnt 
desc,\n\tp_brand,\n\tp_type,\n\tp_size;","sequential_pipe")
 
diff --git a/sql/benchmarks/tpch/Tests/16-plan.stable.out 
b/sql/benchmarks/tpch/Tests/16-plan.stable.out
--- a/sql/benchmarks/tpch/Tests/16-plan.stable.out
+++ b/sql/benchmarks/tpch/Tests/16-plan.stable.out
@@ -58,25 +58,27 @@ Ready.
 % .plan # table_name
 % rel # name
 % clob # type
-% 335 # length
+% 337 # length
 project (
 | group by (
-| | project (
-| | | antijoin (
-| | | | join (
-| | | | | table(sys.partsupp) [ partsupp.ps_suppkey NOT NULL, 
partsupp.%partsupp_ps_partkey_fkey NOT NULL JOINIDX 
sys.partsupp.partsupp_ps_partkey_fkey ] COUNT ,
-| | | | | select (
-| | | | | | table(sys.part) [ part.p_brand NOT NULL, part.p_type NOT NULL, 
part.p_size NOT NULL, part.%TID% NOT NULL ] COUNT 
-| | | | | ) [ part.p_brand NOT NULL != varchar(10)[char(8) "Brand#45"], 
char[part.p_type NOT NULL] ! FILTER like (char[char(16) "MEDIUM POLISHED%"], 
char ""), part.p_size NOT NULL in (int[tinyint "49"], int[tinyint "14"], 
int[tinyint "23"], int[tinyint "45"], int[tinyint "19"], int[tinyint "3"], 
int[tinyint "36"], int[tinyint "9"]) ]
-| | | | ) [ partsupp.%partsupp_ps_partkey_fkey NOT NULL = part.%TID% NOT NULL 
JOINIDX sys.partsupp.partsupp_ps_partkey_fkey ],
-| | | | project (
-| | | | | select (
-| | | | | | table(sys.supplier) [ supplier.s_suppkey NOT NULL HASHCOL , 
supplier.s_comment NOT NULL ] COUNT 
-| | | | | ) [ char[supplier.s_comment NOT NULL] FILTER like (char[char(21) 
"%Customer%Complaints%"], char "") ]
-| | | | ) [ supplier.s_suppkey NOT NULL HASHCOL  as L3.L2 ]
-| | | ) [ partsupp.ps_suppkey NOT NULL = L3.L2 NOT NULL HASHCOL  ]
-| | ) [ partsupp.ps_suppkey NOT NULL, part.p_brand NOT NULL, part.p_type NOT 
NULL, part.p_size NOT NULL ]
-| ) [ part.p_size NOT NULL, part.p_brand NOT NULL, part.p_type NOT NULL ] [ 
part.p_brand NOT NULL, part.p_type NOT NULL, part.p_size NOT NULL, sys.count 
unique  no nil (partsupp.ps_suppkey NOT NULL) NOT NULL as L4.L4 ]
+| | group by (
+| | | project (
+| | | | antijoin (
+| | | | | join (
+| | | | | | table(sys.partsupp) [ partsupp.ps_suppkey NOT NULL, 
partsupp.%partsupp_ps_partkey_fkey NOT NULL JOINIDX 
sys.partsupp.partsupp_ps_partkey_fkey ] COUNT ,
+| | | | | | select (
+| | | | | | | table(sys.part) [ part.p_brand NOT NULL, part.p_type NOT NULL, 
part.p_size NOT NULL, part.%TID% NOT NULL ] COUNT 
+| | | | | | ) [ part.p_brand NOT NULL != varchar(10)[char(8) "Brand#45"], 
char[part.p_type NOT NULL] ! FILTER like (char[char(16) "MEDIUM POLISHED%"], 
char ""), part.p_size NOT NULL in (int[tinyint "49"], int[tinyint "14"], 
int[tinyint "23"], int[tinyint "45"], int[tinyint "19"], int[tinyint "3"], 
int[tinyint "36"], int[tinyint "9"]) ]
+| | | | | ) [ partsupp.%partsupp_ps_partkey_fkey NOT NULL = part.%TID% NOT 
NULL JOINIDX sys.partsupp.partsupp_ps_partkey_fkey ],
+| | | | | project (
+| | | | | | select (
+| | | | | | | table(sys.supplier) [ supplier.s_suppkey NOT NULL HASHCOL , 
supplier.s_comment NOT NULL ] COUNT 
+| | | | | | ) [ char[supplier.s_comment NOT NULL] FILTER like (char[char(21) 
"%Customer%Complaints%"], char "") ]
+| | | | | ) [ supplier.s_suppkey NOT NULL HASHCOL  as L3.L2 ]
+| | | | ) [ partsupp.ps_suppkey NOT NULL = L3.L2 NOT NULL HASHCOL  ]
+| | | ) [ partsupp.ps_suppkey NOT NULL, part.p_brand NOT NULL, part.p_type NOT 
NULL, part.p_size NOT NULL ]
+| | ) [ partsupp.ps_suppkey NOT NULL as L5.L5, part.p_size NOT NULL, 
part.p_brand NOT NULL, part.p_type NOT NULL ] [ part.p_size NOT NULL, 
part.p_brand NOT NULL, part.p_type NOT NULL, L5.L5 NOT NULL ]
+| ) [ part.p_size NOT NULL, part.p_brand NOT NULL, part.p_type NOT NULL ] [ 
part.p_brand NOT NULL, part.p_type NOT NULL, part.p_size NOT NULL, sys.count no 
nil (L5.L5 NOT NULL) NOT NULL as L4.L4 ]
 ) [ part.p_brand NOT NULL, part.p_type NOT NULL, part.p_size NOT NULL, L4 NOT 
NULL as L4.supplier_cnt ] [ L4.supplier_cnt NOT NULL, part.p_brand ASC NOT 
NULL, part.p_type ASC NOT NULL, part.p_size ASC NOT NULL ]
 
 # 22:46:33 >  
diff --git a/sql/server/rel_optimizer.c b/sql/server/rel_optimizer.c
--- a/sql/server/rel_optimizer.c
+++ b/sql/server/rel_optimizer.c
@@ -3943,6 +3943,50 @@ rel_reduce_groupby_exps(int *changes, mv
        return rel;
 }
 
+/* Rewrite group by expressions with distinct 
+ *
+ * ie select a, count(distinct b) from c where ... groupby a;
+ * No other aggregations should be present
+ */
+
+static sql_rel *
+rel_groupby_distinct(int *changes, mvc *sql, sql_rel *rel) 
+{
+       if (is_groupby(rel->op) && rel->r && !rel_is_ref(rel)) {
+               node *n;
+               int nr = 0;
+               list *gbe, *arg, *exps;
+               sql_exp *distinct = NULL, *darg;
+               sql_rel *l = NULL;
+
+               for (n=rel->exps->h; n && nr <= 2; n = n->next) {
+                       sql_exp *e = n->data;
+                       if (need_distinct(e)) {
+                               distinct = n->data;
+                               nr++;
+                       }
+               }
+               if (nr != 1 || list_length(rel->r) + nr != 
list_length(rel->exps)) 
+                       return rel;
+               arg = distinct->l;
+               if (distinct->type != e_aggr || list_length(arg) != 1)
+                       return rel;
+               darg = arg->h->data;
+               exp_label(sql->sa, darg, ++sql->label);
+               gbe = list_dup(rel->r, (fdup)NULL);
+               exps = list_dup(rel->r, (fdup)NULL);
+               list_append(gbe, exp_copy(sql->sa, darg));
+               darg = exp_column(sql->sa, exp_find_rel_name(darg), 
exp_name(darg), exp_subtype(darg), darg->card, has_nil(darg), is_intern(darg));
+               list_append(exps, exp_copy(sql->sa, darg));
+               arg->h->data = darg;
+               l = rel->l = rel_groupby(sql, rel->l, gbe);
+               l->exps = exps;
+               set_nodistinct(distinct);
+               (*changes)++;
+       }
+       return rel;
+}
+
 static sql_exp *split_aggr_and_project(mvc *sql, list *aexps, sql_exp *e);
 
 static void
@@ -6401,6 +6445,7 @@ static sql_rel *
                rel = rewrite_topdown(sql, rel, &rel_push_aggr_down, &changes);
                rel = rewrite(sql, rel, &rel_groupby_order, &changes); 
                rel = rewrite(sql, rel, &rel_reduce_groupby_exps, &changes); 
+               rel = rewrite(sql, rel, &rel_groupby_distinct, &changes); 
        }
 
        if (gp.cnt[op_join] || gp.cnt[op_left] || 
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to