Changeset: d53433b54a92 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=d53433b54a92
Modified Files:
        sql/backends/monet5/rel_weld.c
Branch: rel-weld
Log Message:

maybe a faster join


diffs (truncated from 382 to 300 lines):

diff --git a/sql/backends/monet5/rel_weld.c b/sql/backends/monet5/rel_weld.c
--- a/sql/backends/monet5/rel_weld.c
+++ b/sql/backends/monet5/rel_weld.c
@@ -1047,11 +1047,11 @@ cleanup:
 }
 
 static void
-join_produce(backend *be, sql_rel *rel, weld_state *wstate)
+semi_anti_produce(backend *be, sql_rel *rel, weld_state *wstate)
 {
-       char new_builder[STR_BUF_SIZE], probe_key[STR_BUF_SIZE], 
nulls[STR_BUF_SIZE], struct_mbr[64];
+       char new_builder[STR_BUF_SIZE], probe_key[STR_BUF_SIZE];
        str col_name, old_builder;
-       int len, nulls_len, i, count, is_filter = 0, result_var, old_num_loops, 
old_num_parens;
+       int len, i, is_filter = 0, result_var, old_num_loops, old_num_parens;
        node *en;
        sql_exp *exp;
        sql_rel *right = rel->r;
@@ -1112,11 +1112,7 @@ join_produce(backend *be, sql_rel *rel, 
        }
 
        len = 0;
-       if (rel->op == op_semi || rel->op == op_anti) {
-               len += sprintf(new_builder + len, "dictmerger[");
-       } else {
-               len += sprintf(new_builder + len, "groupmerger[");
-       }
+       len += sprintf(new_builder + len, "dictmerger[");
        if (list_length(rel->exps) > 1) {
                len += sprintf(new_builder + len, "{"); /* key is a struct */
        }
@@ -1148,37 +1144,7 @@ join_produce(backend *be, sql_rel *rel, 
        if (list_length(rel->exps) > 1) {
                len += sprintf(new_builder + len, "}"); /* key is a struct */
        }
-       len += sprintf(new_builder + len, ", ");
-       if (rel->op == op_semi || rel->op == op_anti) {
-               /* We only need a dictionary to lookup the key */
-               len += sprintf(new_builder + len, "i64, +");
-       } else {
-               nulls_len = 0;
-               if (list_length(right->exps) > 1) {
-                       len += sprintf(new_builder + len, "{"); /* value is a 
struct */
-                       nulls_len += sprintf(nulls + nulls_len, "{");
-               }
-               for (en = right->exps->h; en; en = en->next) {
-                       exp = en->data;
-                       int type = exp_subtype(exp)->type->localtype;
-                       if (type == TYPE_str) {
-                               len += sprintf(new_builder + len, "i64");
-                               nulls_len += sprintf(nulls + nulls_len, 
"i64nil");
-                       } else {
-                               len += sprintf(new_builder + len, "%s", 
getWeldType(type));
-                               nulls_len += sprintf(nulls + nulls_len, 
"%snil", getWeldType(type));
-                       }
-                       if (en->next != NULL) {
-                               len += sprintf(new_builder + len, ", ");
-                               nulls_len += sprintf(nulls + nulls_len, ", ");
-                       }
-               }
-               if (list_length(right->exps) > 1) {
-                       len += sprintf(new_builder + len, "}"); /* value is a 
struct */
-                       nulls_len += sprintf(nulls + nulls_len, "}");
-               }
-       }
-       len += sprintf(new_builder + len, "]");
+       len += sprintf(new_builder + len, ", i64, +]");
 
        wstate->builder = new_builder;
        right_produce(be, rel->r, wstate);
@@ -1198,30 +1164,7 @@ join_produce(backend *be, sql_rel *rel, 
        if (list_length(right_cmp_cols) > 1) {
                wprintf(wstate, "}"); /* key is a struct */
        }
-       wprintf(wstate, ", ");
-       if (rel->op == op_semi || rel->op == op_anti) {
-               wprintf(wstate, "1L");
-       } else {
-               if (list_length(right->exps) > 1) {
-                       wprintf(wstate, "{"); /* value is a struct */
-               }
-               /* Build the value */
-               for (en = right->exps->h, count = 0; en; en = en->next, 
count++) {
-                       exp = en->data;
-                       wprintf(wstate, "%s", (str)list_fetch(right_cols, 
count));
-                       if (exp_subtype(exp)->type->localtype == TYPE_str) {
-                               wprintf(wstate, "_stridx");
-                       }
-                       if (en->next != NULL) {
-                               wprintf(wstate, ", ");
-                       }
-               }
-               if (list_length(right->exps) > 1) {
-                       wprintf(wstate, "}"); /* value is a struct */
-               }
-       }
-
-       wprintf(wstate, "})");
+       wprintf(wstate, ", 1L})");
        for (i = 0; i < wstate->num_parens; i++) {
                wprintf(wstate, ")");
        }
@@ -1251,40 +1194,230 @@ join_produce(backend *be, sql_rel *rel, 
                len += sprintf(probe_key + len, "}");
        }
 
+       wstate->num_parens++;
+       /* Reverse the condition */
        if (rel->op == op_semi) {
-               /* Reverse the condition */
-               wstate->num_parens++;
                wprintf(wstate, "if(keyexists(v%d, %s) == false, b%d, ", 
result_var, probe_key, wstate->num_loops);
        } else if (rel->op == op_anti) {
-               /* Reverse the condition */
-               wstate->num_parens++;
                wprintf(wstate, "if(keyexists(v%d, %s) == true, b%d, ", 
result_var, probe_key, wstate->num_loops);
-       } else {
-               if (rel->op == op_join) {
-                       wprintf(wstate, "let match = if(keyexists(v%d, %s), 
lookup(v%d, %s), []);", result_var,
-                                       probe_key, result_var, probe_key);
-               } else if (rel->op == op_left) {
-                       wprintf(wstate, "let match = if(keyexists(v%d, %s), 
lookup(v%d, %s), [%s]);", result_var,
-                                       probe_key, result_var, probe_key, 
nulls);
+       }
+cleanup:
+       list_destroy(right_cols);
+       list_destroy(right_cmp_cols);
+       list_destroy(left_cmp_cols);
+}
+
+static void
+join_produce(backend *be, sql_rel *rel, weld_state *wstate)
+{
+       char new_builder[STR_BUF_SIZE], probe_key[STR_BUF_SIZE], 
nulls[STR_BUF_SIZE], value[64];
+       str col_name, old_builder;
+       int len, nulls_len, i, count, is_filter = 0, result_var, old_num_loops, 
old_num_parens;
+       node *en;
+       sql_exp *exp;
+       sql_rel *right = rel->r;
+       list *right_cols = sa_list(wstate->sa);
+       list *right_cmp_cols = sa_list(wstate->sa);
+       list *left_cmp_cols = sa_list(wstate->sa);
+       produce_func left_produce, right_produce;
+
+       if (rel->exps == NULL) {
+               /* Cross product */
+               wstate->error = 1;
+               goto cleanup;
+       }
+       for (en = rel->exps->h; en; en = en->next) {
+               exp = en->data;
+               if (get_cmp(exp) == cmp_notequal) {
+                       /* We don't support this yet */
+                       wstate->error = 1;
+                       goto cleanup;
+               } else if (get_cmp(exp) != cmp_equal) {
+                       is_filter = 1;
+               }
+       }
+
+       right_produce = getproduce_func(rel->r);
+       left_produce = getproduce_func(rel->l);
+       if (right_produce == NULL || left_produce == NULL) {
+               wstate->error = 1;
+               goto cleanup;
+       }
+       if (is_filter) {
+               right_produce(be, rel->r, wstate);
+               select_produce(be, rel, wstate);
+               goto cleanup;
+       }
+
+       /* === Produce === */
+       old_num_parens = wstate->num_parens;
+       old_num_loops = wstate->num_loops;
+       old_builder = wstate->builder;
+
+       /* Create a new builder */
+       wstate->num_parens = wstate->num_loops = 0;
+       result_var = wstate->next_var++;
+       wprintf(wstate, "let v%dvecs = (", result_var);
+       wstate->num_parens++;
+
+       /* Find the operator that produces the columns */
+       while (right != NULL && right->op != op_project && right->op != 
op_basetable) {
+               right = right->l;
+       }
+       if (right == NULL) {
+               wstate->error = 1;
+               goto cleanup;
+       }
+       for (en = right->exps->h; en; en = en->next) {
+               list_append(right_cols, get_col_name(wstate->sa, en->data, 
ANY));
+       }
+
+       /* {appender[{..}], appender[..], appender[..] ... } */
+       len = 0;
+       len += sprintf(new_builder + len, "{appender[");
+       if (list_length(rel->exps) > 1) {
+               len += sprintf(new_builder + len, "{"); /* key is a struct */
+       }
+       for (en = rel->exps->h; en; en = en->next) {
+               /* left cmp */
+               exp = ((sql_exp*)en->data)->l;
+               col_name = get_col_name(wstate->sa, exp, REL);
+               if (list_find(right_cols, col_name, (fcmp)strcmp)) {
+                       list_append(right_cmp_cols, col_name);
+               } else {
+                       list_append(left_cmp_cols, col_name);
+               }
+               /* right cmp */
+               exp = ((sql_exp*)en->data)->r;
+               col_name = get_col_name(wstate->sa, exp, REL);
+               if (list_find(right_cols, col_name, (fcmp)strcmp)) {
+                       list_append(right_cmp_cols, col_name);
+               } else {
+                       list_append(left_cmp_cols, col_name);
+               }
+
+               /* both have the same type */
+               int type = exp_subtype(exp)->type->localtype;
+               len += sprintf(new_builder + len, "%s", getWeldType(type));
+               if (en->next != NULL) {
+                       len += sprintf(new_builder + len, ", ");
                }
-               wstate->num_parens++;
-               wstate->num_loops++;
-               wprintf(wstate, "for(match, b%d, |b%d, i_%d, n%d|", 
wstate->num_loops - 1,
-                               wstate->num_loops, wstate->num_loops, 
wstate->num_loops);
-               for (en = right->exps->h, count = 0; en; en = en->next, 
count++) {
-                       len = sprintf(struct_mbr, "n%d", wstate->num_loops);
-                       if (list_length(right->exps) > 1) {
-                               len += sprintf(struct_mbr + len, ".$%d", count);
-                       }
-                       exp = en->data;
-                       col_name = list_fetch(right_cols, count);
-                       if (exp_subtype(exp)->type->localtype == TYPE_str) {
-                               wprintf(wstate, "let %s = strslice(%s_strcol, 
%s);", col_name, col_name,
-                                               struct_mbr);
-                               wprintf(wstate, "let %s_stridx = %s;", 
col_name, struct_mbr);
-                       } else {
-                               wprintf(wstate, "let %s = %s;", col_name, 
struct_mbr);
-                       }
+       }
+       if (list_length(rel->exps) > 1) {
+               len += sprintf(new_builder + len, "}"); /* key is a struct */
+       }
+       len += sprintf(new_builder + len, "], ");
+       nulls_len = 0;
+       for (en = right->exps->h, count = 1; en; en = en->next, count++) {
+               exp = en->data;
+               int type = exp_subtype(exp)->type->localtype;
+               if (type == TYPE_str) {
+                       type = TYPE_lng;
+               }
+               len += sprintf(new_builder + len, "appender[%s]", 
getWeldType(type));
+               nulls_len += sprintf(nulls + nulls_len, "merge(v%dvecs.$%d, 
%snil)", result_var, count, getWeldType(type));
+               if (en->next != NULL) {
+                       len += sprintf(new_builder + len, ", ");
+                       nulls_len += sprintf(nulls + nulls_len, ", ");
+               }
+       }
+       len += sprintf(new_builder + len, "}"); /* complete the builder */
+
+       wstate->builder = new_builder;
+       right_produce(be, rel->r, wstate);
+
+       /* === Consume === */
+       wprintf(wstate, "{merge(b%d.$0, ", wstate->num_loops); /* {key, value} 
*/
+       /* Build the key */
+       if (list_length(right_cmp_cols) > 1) {
+               wprintf(wstate, "{"); /* key is a struct */
+       }
+       for (en = right_cmp_cols->h; en; en = en->next) {
+               wprintf(wstate, "%s", (str)en->data);
+               if (en->next != NULL) {
+                       wprintf(wstate, ", ");
+               }
+       }
+       if (list_length(right_cmp_cols) > 1) {
+               wprintf(wstate, "}"); /* key is a struct */
+       }
+       wprintf(wstate, "), ");
+       /* Append the values */
+       for (en = right->exps->h, count = 0; en; en = en->next, count++) {
+               exp = en->data;
+               wprintf(wstate, "merge(b%d.$%d, %s", wstate->num_loops, count + 
1, (str)list_fetch(right_cols, count));
+               if (exp_subtype(exp)->type->localtype == TYPE_str) {
+                       wprintf(wstate, "_stridx");
+               }
+               wprintf(wstate, ")");
+               if (en->next != NULL) {
+                       wprintf(wstate, ", ");
+               }
+       }
+       wprintf(wstate, "}");
+       for (i = 0; i < wstate->num_parens; i++) {
+               wprintf(wstate, ")");
+       }
+       wprintf(wstate, ";");
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to