Changeset: 78eca92f6d68 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=78eca92f6d68 Modified Files: sql/benchmarks/tpch/Tests/16-explain.stable.out sql/benchmarks/tpch/Tests/16-plan.stable.out sql/server/rel_optimizer.c Branch: Jan2014 Log Message:
rewrite group by distinct diffs (185 lines): diff --git a/sql/benchmarks/tpch/Tests/16-explain.stable.out b/sql/benchmarks/tpch/Tests/16-explain.stable.out --- a/sql/benchmarks/tpch/Tests/16-explain.stable.out +++ b/sql/benchmarks/tpch/Tests/16-explain.stable.out @@ -123,36 +123,41 @@ function user.s2_1{autoCommit=true}(A0:s X_105:bat[:oid,:int] := algebra.leftfetchjoinPath(X_97,X_87,X_104); (X_106,r1_121) := algebra.join(X_85,X_105); X_108 := algebra.tdiff(X_86,X_106); - X_196 := algebra.leftfetchjoin(X_108,r1_83); - X_109:bat[:oid,:str] := algebra.leftfetchjoinPath(X_196,X_70,X_42); - X_110:bat[:oid,:str] := algebra.leftfetchjoinPath(X_196,X_70,X_51); - X_111:bat[:oid,:int] := algebra.leftfetchjoinPath(X_196,X_70,X_35); - (X_112,r1_133,r2_133) := group.subgroup(X_111); - (X_115,r1_136,r2_136) := group.subgroup(X_110,X_112); - (X_118,r1_139,r2_139) := group.subgroupdone(X_109,X_115); - X_121 := algebra.leftfetchjoin(r1_139,X_111); - X_122 := algebra.leftfetchjoin(r1_139,X_109); - X_123 := algebra.leftfetchjoin(r1_139,X_110); - X_124 := algebra.leftfetchjoin(X_108,X_85); - (X_125,r1_146,r2_146) := group.subgroupdone(X_124,X_118); - X_128 := algebra.leftfetchjoin(r1_146,X_124); - X_129 := algebra.leftfetchjoin(r1_146,X_118); - X_130:bat[:oid,:wrd] := aggr.subcount(X_128,X_129,r1_139,true); - (X_132,r1_153,r2_153) := algebra.subsort(X_130,true,false); - (X_136,r1_157,r2_157) := algebra.subsort(X_123,r1_153,r2_153,false,false); - (X_139,r1_160,r2_160) := algebra.subsort(X_122,r1_157,r2_157,false,false); - (X_142,r1_163,r2_163) := algebra.subsort(X_121,r1_160,r2_160,false,false); - X_145 := algebra.leftfetchjoin(r1_163,X_123); - X_148 := algebra.leftfetchjoin(r1_163,X_130); - X_147 := algebra.leftfetchjoin(r1_163,X_121); - X_146 := algebra.leftfetchjoin(r1_163,X_122); - X_149 := sql.resultSet(4,1,X_145); - sql.rsColumn(X_149,"sys.part","p_brand","varchar",10,0,X_145); - sql.rsColumn(X_149,"sys.part","p_type","varchar",25,0,X_146); - sql.rsColumn(X_149,"sys.part","p_size","int",32,0,X_147); - sql.rsColumn(X_149,"sys.L4","supplier_cnt","wrd",64,0,X_148); - X_170 := io.stdout(); - sql.exportResult(X_170,X_149); + X_207 := algebra.leftfetchjoin(X_108,r1_83); + X_109:bat[:oid,:str] := algebra.leftfetchjoinPath(X_207,X_70,X_42); + X_110:bat[:oid,:str] := algebra.leftfetchjoinPath(X_207,X_70,X_51); + X_111:bat[:oid,:int] := algebra.leftfetchjoinPath(X_207,X_70,X_35); + X_112 := algebra.leftfetchjoin(X_108,X_85); + (X_113,r1_134,r2_134) := group.subgroup(X_112); + (X_116,r1_137,r2_137) := group.subgroup(X_111,X_113); + (X_119,r1_140,r2_140) := group.subgroup(X_110,X_116); + (X_122,r1_143,r2_143) := group.subgroupdone(X_109,X_119); + X_125 := algebra.leftfetchjoin(r1_143,X_109); + X_126 := algebra.leftfetchjoin(r1_143,X_110); + X_127 := algebra.leftfetchjoin(r1_143,X_111); + X_140 := algebra.leftfetchjoin(r1_143,X_112); + (X_128,r1_149,r2_149) := group.subgroup(X_127); + (X_131,r1_152,r2_152) := group.subgroup(X_126,X_128); + (X_134,r1_155,r2_155) := group.subgroupdone(X_125,X_131); + X_137 := algebra.leftfetchjoin(r1_155,X_127); + X_138 := algebra.leftfetchjoin(r1_155,X_125); + X_139 := algebra.leftfetchjoin(r1_155,X_126); + X_141:bat[:oid,:wrd] := aggr.subcount(X_140,X_134,r1_155,true); + (X_143,r1_164,r2_164) := algebra.subsort(X_141,true,false); + (X_147,r1_168,r2_168) := algebra.subsort(X_139,r1_164,r2_164,false,false); + (X_150,r1_171,r2_171) := algebra.subsort(X_138,r1_168,r2_168,false,false); + (X_153,r1_174,r2_174) := algebra.subsort(X_137,r1_171,r2_171,false,false); + X_156 := algebra.leftfetchjoin(r1_174,X_139); + X_159 := algebra.leftfetchjoin(r1_174,X_141); + X_158 := algebra.leftfetchjoin(r1_174,X_137); + X_157 := algebra.leftfetchjoin(r1_174,X_138); + X_160 := sql.resultSet(4,1,X_156); + sql.rsColumn(X_160,"sys.part","p_brand","varchar",10,0,X_156); + sql.rsColumn(X_160,"sys.part","p_type","varchar",25,0,X_157); + sql.rsColumn(X_160,"sys.part","p_size","int",32,0,X_158); + sql.rsColumn(X_160,"sys.L4","supplier_cnt","wrd",64,0,X_159); + X_181 := io.stdout(); + sql.exportResult(X_181,X_160); end s2_1; # querylog.define("explain select\n\tp_brand,\n\tp_type,\n\tp_size,\n\tcount(distinct ps_suppkey) as supplier_cnt\nfrom\n\tpartsupp,\n\tpart\nwhere\n\tp_partkey = ps_partkey\n\tand p_brand <> \\'Brand#45\\'\n\tand p_type not like \\'MEDIUM POLISHED%\\'\n\tand p_size in (49, 14, 23, 45, 19, 3, 36, 9)\n\tand ps_suppkey not in (\n\t\tselect\n\t\t\ts_suppkey\n\t\tfrom\n\t\t\tsupplier\n\t\twhere\n\t\t\ts_comment like \\'%Customer%Complaints%\\'\n\t)\ngroup by\n\tp_brand,\n\tp_type,\n\tp_size\norder by\n\tsupplier_cnt desc,\n\tp_brand,\n\tp_type,\n\tp_size;","sequential_pipe") diff --git a/sql/benchmarks/tpch/Tests/16-plan.stable.out b/sql/benchmarks/tpch/Tests/16-plan.stable.out --- a/sql/benchmarks/tpch/Tests/16-plan.stable.out +++ b/sql/benchmarks/tpch/Tests/16-plan.stable.out @@ -58,25 +58,27 @@ Ready. % .plan # table_name % rel # name % clob # type -% 335 # length +% 337 # length project ( | group by ( -| | project ( -| | | antijoin ( -| | | | join ( -| | | | | table(sys.partsupp) [ partsupp.ps_suppkey NOT NULL, partsupp.%partsupp_ps_partkey_fkey NOT NULL JOINIDX sys.partsupp.partsupp_ps_partkey_fkey ] COUNT , -| | | | | select ( -| | | | | | table(sys.part) [ part.p_brand NOT NULL, part.p_type NOT NULL, part.p_size NOT NULL, part.%TID% NOT NULL ] COUNT -| | | | | ) [ part.p_brand NOT NULL != varchar(10)[char(8) "Brand#45"], char[part.p_type NOT NULL] ! FILTER like (char[char(16) "MEDIUM POLISHED%"], char ""), part.p_size NOT NULL in (int[tinyint "49"], int[tinyint "14"], int[tinyint "23"], int[tinyint "45"], int[tinyint "19"], int[tinyint "3"], int[tinyint "36"], int[tinyint "9"]) ] -| | | | ) [ partsupp.%partsupp_ps_partkey_fkey NOT NULL = part.%TID% NOT NULL JOINIDX sys.partsupp.partsupp_ps_partkey_fkey ], -| | | | project ( -| | | | | select ( -| | | | | | table(sys.supplier) [ supplier.s_suppkey NOT NULL HASHCOL , supplier.s_comment NOT NULL ] COUNT -| | | | | ) [ char[supplier.s_comment NOT NULL] FILTER like (char[char(21) "%Customer%Complaints%"], char "") ] -| | | | ) [ supplier.s_suppkey NOT NULL HASHCOL as L3.L2 ] -| | | ) [ partsupp.ps_suppkey NOT NULL = L3.L2 NOT NULL HASHCOL ] -| | ) [ partsupp.ps_suppkey NOT NULL, part.p_brand NOT NULL, part.p_type NOT NULL, part.p_size NOT NULL ] -| ) [ part.p_size NOT NULL, part.p_brand NOT NULL, part.p_type NOT NULL ] [ part.p_brand NOT NULL, part.p_type NOT NULL, part.p_size NOT NULL, sys.count unique no nil (partsupp.ps_suppkey NOT NULL) NOT NULL as L4.L4 ] +| | group by ( +| | | project ( +| | | | antijoin ( +| | | | | join ( +| | | | | | table(sys.partsupp) [ partsupp.ps_suppkey NOT NULL, partsupp.%partsupp_ps_partkey_fkey NOT NULL JOINIDX sys.partsupp.partsupp_ps_partkey_fkey ] COUNT , +| | | | | | select ( +| | | | | | | table(sys.part) [ part.p_brand NOT NULL, part.p_type NOT NULL, part.p_size NOT NULL, part.%TID% NOT NULL ] COUNT +| | | | | | ) [ part.p_brand NOT NULL != varchar(10)[char(8) "Brand#45"], char[part.p_type NOT NULL] ! FILTER like (char[char(16) "MEDIUM POLISHED%"], char ""), part.p_size NOT NULL in (int[tinyint "49"], int[tinyint "14"], int[tinyint "23"], int[tinyint "45"], int[tinyint "19"], int[tinyint "3"], int[tinyint "36"], int[tinyint "9"]) ] +| | | | | ) [ partsupp.%partsupp_ps_partkey_fkey NOT NULL = part.%TID% NOT NULL JOINIDX sys.partsupp.partsupp_ps_partkey_fkey ], +| | | | | project ( +| | | | | | select ( +| | | | | | | table(sys.supplier) [ supplier.s_suppkey NOT NULL HASHCOL , supplier.s_comment NOT NULL ] COUNT +| | | | | | ) [ char[supplier.s_comment NOT NULL] FILTER like (char[char(21) "%Customer%Complaints%"], char "") ] +| | | | | ) [ supplier.s_suppkey NOT NULL HASHCOL as L3.L2 ] +| | | | ) [ partsupp.ps_suppkey NOT NULL = L3.L2 NOT NULL HASHCOL ] +| | | ) [ partsupp.ps_suppkey NOT NULL, part.p_brand NOT NULL, part.p_type NOT NULL, part.p_size NOT NULL ] +| | ) [ partsupp.ps_suppkey NOT NULL as L5.L5, part.p_size NOT NULL, part.p_brand NOT NULL, part.p_type NOT NULL ] [ part.p_size NOT NULL, part.p_brand NOT NULL, part.p_type NOT NULL, L5.L5 NOT NULL ] +| ) [ part.p_size NOT NULL, part.p_brand NOT NULL, part.p_type NOT NULL ] [ part.p_brand NOT NULL, part.p_type NOT NULL, part.p_size NOT NULL, sys.count no nil (L5.L5 NOT NULL) NOT NULL as L4.L4 ] ) [ part.p_brand NOT NULL, part.p_type NOT NULL, part.p_size NOT NULL, L4 NOT NULL as L4.supplier_cnt ] [ L4.supplier_cnt NOT NULL, part.p_brand ASC NOT NULL, part.p_type ASC NOT NULL, part.p_size ASC NOT NULL ] # 22:46:33 > diff --git a/sql/server/rel_optimizer.c b/sql/server/rel_optimizer.c --- a/sql/server/rel_optimizer.c +++ b/sql/server/rel_optimizer.c @@ -3943,6 +3943,50 @@ rel_reduce_groupby_exps(int *changes, mv return rel; } +/* Rewrite group by expressions with distinct + * + * ie select a, count(distinct b) from c where ... groupby a; + * No other aggregations should be present + */ + +static sql_rel * +rel_groupby_distinct(int *changes, mvc *sql, sql_rel *rel) +{ + if (is_groupby(rel->op) && rel->r && !rel_is_ref(rel)) { + node *n; + int nr = 0; + list *gbe, *arg, *exps; + sql_exp *distinct = NULL, *darg; + sql_rel *l = NULL; + + for (n=rel->exps->h; n && nr <= 2; n = n->next) { + sql_exp *e = n->data; + if (need_distinct(e)) { + distinct = n->data; + nr++; + } + } + if (nr != 1 || list_length(rel->r) + nr != list_length(rel->exps)) + return rel; + arg = distinct->l; + if (distinct->type != e_aggr || list_length(arg) != 1) + return rel; + darg = arg->h->data; + exp_label(sql->sa, darg, ++sql->label); + gbe = list_dup(rel->r, (fdup)NULL); + exps = list_dup(rel->r, (fdup)NULL); + list_append(gbe, exp_copy(sql->sa, darg)); + darg = exp_column(sql->sa, exp_find_rel_name(darg), exp_name(darg), exp_subtype(darg), darg->card, has_nil(darg), is_intern(darg)); + list_append(exps, exp_copy(sql->sa, darg)); + arg->h->data = darg; + l = rel->l = rel_groupby(sql, rel->l, gbe); + l->exps = exps; + set_nodistinct(distinct); + (*changes)++; + } + return rel; +} + static sql_exp *split_aggr_and_project(mvc *sql, list *aexps, sql_exp *e); static void @@ -6401,6 +6445,7 @@ static sql_rel * rel = rewrite_topdown(sql, rel, &rel_push_aggr_down, &changes); rel = rewrite(sql, rel, &rel_groupby_order, &changes); rel = rewrite(sql, rel, &rel_reduce_groupby_exps, &changes); + rel = rewrite(sql, rel, &rel_groupby_distinct, &changes); } if (gp.cnt[op_join] || gp.cnt[op_left] || _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list