Changeset: 735c252d2dff for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=735c252d2dff
Modified Files:
        monetdb5/modules/mal/mal_weld.c
        monetdb5/modules/mal/mal_weld.h
        monetdb5/modules/mal/mal_weld.mal
        monetdb5/modules/mal/mal_weld.mal.sh
        monetdb5/optimizer/opt_prelude.c
        monetdb5/optimizer/opt_prelude.h
        monetdb5/optimizer/opt_weld.c
Branch: mal-weld
Log Message:

weld impl for group.group and group.groupdone


diffs (250 lines):

diff --git a/monetdb5/modules/mal/mal_weld.c b/monetdb5/modules/mal/mal_weld.c
--- a/monetdb5/modules/mal/mal_weld.c
+++ b/monetdb5/modules/mal/mal_weld.c
@@ -117,12 +117,14 @@ static void dumpWeldProgram(weldState *w
 }
 
 str
-WeldInitState(ptr *retval)
+WeldInitState(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
 {
+       (void)cntxt;
        weldState *wstate = malloc(sizeof(weldState));
        wstate->programMaxLen = 1;
        wstate->program = calloc(wstate->programMaxLen, sizeof(char));
-       *retval = wstate;
+       wstate->groupDeps = calloc(mb->vtop, sizeof(InstrPtr));
+       *getArgReference_ptr(stk, pci, 0) = wstate;;
        return MAL_SUCCEED;
 }
 
@@ -178,6 +180,7 @@ WeldRun(Client cntxt, MalBlkPtr mb, MalS
        weld_module_t m = weld_module_compile(wstate->program, conf, e);
        weld_conf_free(conf);
        free(wstate->program);
+       free(wstate->groupDeps);
        free(wstate);
        if (weld_error_code(e)) {
                throw(MAL, "weld.run", PROGRAM_GENERAL ": %s", 
weld_error_message(e));
@@ -531,6 +534,90 @@ WeldBatcalcMULsignal(Client cntxt, MalBl
        return WeldBatcalcBinary(mb, stk, pci, "*", "weld.batcalcmul");
 }
 
+/* Ignore the existing groups and instead use all the columns up to this point 
to
+ * generate the new group ids. Weld will remove the unnecessary computations. 
e.g.:
+ * g1, e1, h1 = group.group(col1)  -> for(zip(col1), dictmerger[ty1, i64, 
min]...
+ * g2, e2, h2 = group.grou(col2, g1) -> for(zip(col2, col1), dictmerger[{ty1, 
ty2}, i64, min]...
+ */
+str
+WeldGroup(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
+{
+       (void)cntxt;
+       int groups = getArg(pci, 0);  /* bat[:oid] */
+       int extents = getArg(pci, 1); /* bat[:oid] */
+       int histo = getArg(pci, 2);   /* bat[:lng] */
+       weldState *wstate;
+       if (pci->argc == 6) {
+               wstate = *getArgReference_ptr(stk, pci, 5); /* has value */
+       } else {
+               wstate = *getArgReference_ptr(stk, pci, 4); /* has value */
+       }
+
+       /* Build zip(col1, col2, ...) */
+       wstate->groupDeps[groups] = pci;
+       InstrPtr dep = pci;
+       char zipStmt[STR_SIZE_INC] = {'\0'};
+       char dictTypeStmt[STR_SIZE_INC] = {'\0'};
+       int count = 0;
+       while (dep != NULL) {
+               ++count;
+               int col = getArg(dep, 3);
+               int colType = getBatType(getArgType(mb, dep, 3));
+               sprintf(zipStmt + strlen(zipStmt), "v%d,", col);
+               sprintf(dictTypeStmt + strlen(dictTypeStmt), " %s,", 
getWeldType(colType));
+               if (dep->argc == 6) {
+                       int oldGrps = getArg(dep, 4);
+                       dep = wstate->groupDeps[oldGrps];
+               } else {
+                       dep = NULL;
+               }
+       }
+       /* Replace the last comma */
+       zipStmt[strlen(zipStmt) - 1] = '\0';
+       if (count == 1) {
+               dictTypeStmt[strlen(dictTypeStmt) - 1] = '\0';
+       } else {
+               dictTypeStmt[0] = '{';
+               dictTypeStmt[strlen(dictTypeStmt) - 1] = '}';
+       }
+
+       char weldStmt[STR_SIZE_INC * 2];
+       sprintf(weldStmt, "\
+       let groupHash = result( \
+               for(zip(%s), dictmerger[%s, i64, min], |b, i, n| \
+                       merge(b, {n, i}) \
+               ) \
+       ); \
+       let groupHashVec = tovec(groupHash); \
+       let groupIdsDict = result( \
+               for(groupHashVec, dictmerger[%s, i64, min], |b, i, n| \
+                       merge(b, {n.$0, i}) \
+               ) \
+       ); \
+       let empty = result( \
+               for(rangeiter(0L, len(groupHashVec), 1L), appender[i64], |b, i, 
n| \
+                       merge(b, 0L) \
+               ) \
+       ); \
+       let idsAndCounts = for(zip(%s), {appender[i64], vecmerger[i64, 
+](empty)}, |b, i, n| \
+               let groupId = lookup(groupIdsDict, n); \
+               {merge(b.$0, groupId), merge(b.$1, {groupId, 1L})} \
+       ); \
+       let v%d = result(idsAndCounts.$0); \
+       let v%dhseqbase = 0; \
+       let v%d = result(idsAndCounts.$1); \
+       let v%dhseqbase = 0; \
+       let v%d = result( \
+               for(groupHashVec, vecmerger[i64, +](empty), |b, i, n| \
+                       merge(b, {i, lookup(groupHash, n.$0)}) \
+               ) \
+       ); \
+       let v%dhseqbase = 0;",
+       zipStmt, dictTypeStmt, dictTypeStmt, zipStmt, groups, groups, histo, 
histo, extents, extents);
+       appendWeldStmt(wstate, weldStmt);
+       return MAL_SUCCEED;
+}
+
 str
 WeldLanguagePass(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
 {
diff --git a/monetdb5/modules/mal/mal_weld.h b/monetdb5/modules/mal/mal_weld.h
--- a/monetdb5/modules/mal/mal_weld.h
+++ b/monetdb5/modules/mal/mal_weld.h
@@ -13,10 +13,11 @@
 
 typedef struct {
        char *program;
+       InstrPtr *groupDeps;
        size_t programMaxLen;
 } weldState;
 
-mal_export str WeldInitState(ptr *retval);
+mal_export str WeldInitState(Client cntxt, MalBlkPtr mb, MalStkPtr stk, 
InstrPtr pci);
 mal_export str WeldRun(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr 
pci);
 mal_export str WeldAggrSum(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr 
pci);
 mal_export str WeldAlgebraProjection(Client cntxt, MalBlkPtr mb, MalStkPtr 
stk, InstrPtr pci);
@@ -27,6 +28,7 @@ mal_export str WeldAlgebraThetaselect2(C
 mal_export str WeldBatcalcADDsignal(Client cntxt, MalBlkPtr mb, MalStkPtr stk, 
InstrPtr pci);
 mal_export str WeldBatcalcSUBsignal(Client cntxt, MalBlkPtr mb, MalStkPtr stk, 
InstrPtr pci);
 mal_export str WeldBatcalcMULsignal(Client cntxt, MalBlkPtr mb, MalStkPtr stk, 
InstrPtr pci);
+mal_export str WeldGroup(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr 
pci);
 mal_export str WeldLanguagePass(Client cntxt, MalBlkPtr mb, MalStkPtr stk, 
InstrPtr pci);
 
 #endif
diff --git a/monetdb5/modules/mal/mal_weld.mal 
b/monetdb5/modules/mal/mal_weld.mal
--- a/monetdb5/modules/mal/mal_weld.mal
+++ b/monetdb5/modules/mal/mal_weld.mal
@@ -9,7 +9,7 @@
 module weld;
 
 
-command initstate():ptr
+pattern initstate():ptr
 address WeldInitState
 comment "Initialize the state structure that is used to build a weld program";
 
@@ -37,6 +37,14 @@ pattern algebrathetaselect(b:bat[:any_1]
 address WeldAlgebraThetaselect2
 comment "algebra.thetaselect";
 
+pattern groupgroup(b:bat[:any_1], wstate:ptr) (groups:bat[:oid], 
extents:bat[:oid], histo:bat[:lng])
+address WeldGroup;
+comment "group.group"
+
+pattern groupgroup(b:bat[:any_1], g:bat[:oid], wstate:ptr) (groups:bat[:oid], 
extents:bat[:oid], histo:bat[:lng])
+address WeldGroup;
+comment "group.groupdone"
+
 pattern aggrsum(b:bat[:bte], wstate:ptr):bte
 address WeldAggrSum
 comment "aggr.sum";
diff --git a/monetdb5/modules/mal/mal_weld.mal.sh 
b/monetdb5/modules/mal/mal_weld.mal.sh
--- a/monetdb5/modules/mal/mal_weld.mal.sh
+++ b/monetdb5/modules/mal/mal_weld.mal.sh
@@ -19,7 +19,7 @@ alltypes=(bit ${numeric[@]} oid str)
 
 cat <<EOF
 
-command initstate():ptr
+pattern initstate():ptr
 address WeldInitState
 comment "Initialize the state structure that is used to build a weld program";
 
@@ -47,6 +47,14 @@ pattern algebrathetaselect(b:bat[:any_1]
 address WeldAlgebraThetaselect2
 comment "algebra.thetaselect";
 
+pattern groupgroup(b:bat[:any_1], wstate:ptr) (groups:bat[:oid], 
extents:bat[:oid], histo:bat[:lng])
+address WeldGroup;
+comment "group.group"
+
+pattern groupgroup(b:bat[:any_1], g:bat[:oid], wstate:ptr) (groups:bat[:oid], 
extents:bat[:oid], histo:bat[:lng])
+address WeldGroup;
+comment "group.groupdone"
+
 EOF
 
 for tp in ${numeric[@]}; do
diff --git a/monetdb5/optimizer/opt_prelude.c b/monetdb5/optimizer/opt_prelude.c
--- a/monetdb5/optimizer/opt_prelude.c
+++ b/monetdb5/optimizer/opt_prelude.c
@@ -307,6 +307,7 @@ str weldBatcalcAddRef;
 str weldBatcalcSubRef;
 str weldBatcalcMulRef;
 str weldGetResultRef;
+str weldGroupRef;
 str weldInitStateRef;
 str weldLanguagePassRef;
 str weldRef;
@@ -604,6 +605,7 @@ void optimizerInit(void)
        weldBatcalcSubRef = putName("batcalcsub");
        weldBatcalcMulRef = putName("batcalcmul");
        weldGetResultRef = putName("getresult");
+       weldGroupRef = putName("groupgroup");
        weldInitStateRef = putName("initstate");
        weldLanguagePassRef = putName("languagepass");
        weldRef = putName("weld");
diff --git a/monetdb5/optimizer/opt_prelude.h b/monetdb5/optimizer/opt_prelude.h
--- a/monetdb5/optimizer/opt_prelude.h
+++ b/monetdb5/optimizer/opt_prelude.h
@@ -315,6 +315,7 @@ mal_export  str weldBatcalcAddRef;
 mal_export  str weldBatcalcSubRef;
 mal_export  str weldBatcalcMulRef;
 mal_export  str weldGetResultRef;
+mal_export  str weldGroupRef;
 mal_export  str weldInitStateRef;
 mal_export  str weldLanguagePassRef;
 mal_export  str weldRef;
diff --git a/monetdb5/optimizer/opt_weld.c b/monetdb5/optimizer/opt_weld.c
--- a/monetdb5/optimizer/opt_weld.c
+++ b/monetdb5/optimizer/opt_weld.c
@@ -18,7 +18,7 @@
 #include "mal_instruction.h"
 #include "opt_weld.h"
 
-#define NUM_WELD_INSTR 8
+#define NUM_WELD_INSTR 12
 #define UNMARKED 0
 #define TEMP_MARK 1
 #define PERM_MARK 2
@@ -52,6 +52,10 @@ static void initWeldInstrs(void) {
        addWeldInstr(batcalcRef, minusRef, weldBatcalcSubRef);                  
         /* batcalc.- */
        addWeldInstr(batcalcRef, mulRef, weldBatcalcMulRef);                    
         /* batcalc.* */
        addWeldInstr(languageRef, passRef, weldLanguagePassRef);                
         /* language.pass */
+       addWeldInstr(groupRef, groupRef, weldGroupRef);                         
                 /* group.group*/
+       addWeldInstr(groupRef, subgroupRef, weldGroupRef);                      
                 /* group.subgroup */
+       addWeldInstr(groupRef, groupdoneRef, weldGroupRef);                     
                 /* group.groupdone */
+       addWeldInstr(groupRef, subgroupdoneRef, weldGroupRef);                  
         /* group.subgroupdone */
 }
 
 static str getWeldRef(InstrPtr instr) {
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to