Changeset: 4c6cbebc02f9 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=4c6cbebc02f9 Modified Files: monetdb5/modules/mal/mosaic.c monetdb5/modules/mal/mosaic_variance.c Branch: mosaic Log Message:
Enable variance dictionary compression diffs (267 lines): diff --git a/monetdb5/modules/mal/mosaic.c b/monetdb5/modules/mal/mosaic.c --- a/monetdb5/modules/mal/mosaic.c +++ b/monetdb5/modules/mal/mosaic.c @@ -339,14 +339,14 @@ MOScompressInternal(Client cntxt, int *r factor = fac; } } -/* if ( filter[MOSAIC_VARIANCE]){ fac = MOSestimate_variance(cntxt,task); if (fac > factor){ - cand = MOSAIC_DICT; + cand = MOSAIC_VARIANCE; factor = fac; } } +/* if ( filter[MOSAIC_ZONE]){ fac = MOSestimate_zone(cntxt,task); if (fac > factor){ diff --git a/monetdb5/modules/mal/mosaic_variance.c b/monetdb5/modules/mal/mosaic_variance.c --- a/monetdb5/modules/mal/mosaic_variance.c +++ b/monetdb5/modules/mal/mosaic_variance.c @@ -29,6 +29,11 @@ #include "mosaic_dictionary.h" #include "mosaic_variance.h" +/* + * The dictionary size should be limited or a binary search is required + */ +static int vardictsize=8; + void MOSadvance_variance(Client cntxt, MOStask task) { @@ -37,23 +42,23 @@ MOSadvance_variance(Client cntxt, MOStas task->start += MOSgetCnt(task->blk); switch(ATOMstorage(task->type)){ //case TYPE_bte: case TYPE_bit: no compressionachievable - case TYPE_sht: task->blk = (MosaicBlk)( ((char*)task->blk) + 2* MosaicBlkSize + dictsize * sizeof(sht)+ wordaligned(sizeof(bte) * MOSgetCnt(task->blk),sht)); break; - case TYPE_int: task->blk = (MosaicBlk)( ((char*)task->blk) + 2* MosaicBlkSize + dictsize * sizeof(int)+ wordaligned(sizeof(bte) * MOSgetCnt(task->blk),int)); break; - case TYPE_oid: task->blk = (MosaicBlk)( ((char*)task->blk) + 2* MosaicBlkSize + dictsize * sizeof(oid)+ wordaligned(sizeof(bte) * MOSgetCnt(task->blk),oid)); break; - case TYPE_lng: task->blk = (MosaicBlk)( ((char*)task->blk) + 2* MosaicBlkSize + dictsize * sizeof(lng)+ wordaligned(sizeof(bte) * MOSgetCnt(task->blk),lng)); break; - case TYPE_wrd: task->blk = (MosaicBlk)( ((char*)task->blk) + 2* MosaicBlkSize + dictsize * sizeof(wrd)+ wordaligned(sizeof(bte) * MOSgetCnt(task->blk),wrd)); break; - case TYPE_flt: task->blk = (MosaicBlk)( ((char*)task->blk) + 2* MosaicBlkSize + dictsize * sizeof(flt)+ wordaligned(sizeof(bte) * MOSgetCnt(task->blk),flt)); break; - case TYPE_dbl: task->blk = (MosaicBlk)( ((char*)task->blk) + 2* MosaicBlkSize + dictsize * sizeof(dbl)+ wordaligned(sizeof(bte) * MOSgetCnt(task->blk),dbl)); break; + case TYPE_sht: task->blk = (MosaicBlk)( ((char*)task->blk) + 2* MosaicBlkSize + vardictsize * sizeof(sht)+ wordaligned(sizeof(bte) * MOSgetCnt(task->blk),sht)); break; + case TYPE_int: task->blk = (MosaicBlk)( ((char*)task->blk) + 2* MosaicBlkSize + vardictsize * sizeof(int)+ wordaligned(sizeof(bte) * MOSgetCnt(task->blk),int)); break; + case TYPE_oid: task->blk = (MosaicBlk)( ((char*)task->blk) + 2* MosaicBlkSize + vardictsize * sizeof(oid)+ wordaligned(sizeof(bte) * MOSgetCnt(task->blk),oid)); break; + case TYPE_lng: task->blk = (MosaicBlk)( ((char*)task->blk) + 2* MosaicBlkSize + vardictsize * sizeof(lng)+ wordaligned(sizeof(bte) * MOSgetCnt(task->blk),lng)); break; + case TYPE_wrd: task->blk = (MosaicBlk)( ((char*)task->blk) + 2* MosaicBlkSize + vardictsize * sizeof(wrd)+ wordaligned(sizeof(bte) * MOSgetCnt(task->blk),wrd)); break; + case TYPE_flt: task->blk = (MosaicBlk)( ((char*)task->blk) + 2* MosaicBlkSize + vardictsize * sizeof(flt)+ wordaligned(sizeof(bte) * MOSgetCnt(task->blk),flt)); break; + case TYPE_dbl: task->blk = (MosaicBlk)( ((char*)task->blk) + 2* MosaicBlkSize + vardictsize * sizeof(dbl)+ wordaligned(sizeof(bte) * MOSgetCnt(task->blk),dbl)); break; #ifdef HAVE_HGE - case TYPE_hge: task->blk = (MosaicBlk)( ((char*)task->blk) + 2* MosaicBlkSize + dictsize * sizeof(hge)+ wordaligned(sizeof(bte) * MOSgetCnt(task->blk),hge)); break; + case TYPE_hge: task->blk = (MosaicBlk)( ((char*)task->blk) + 2* MosaicBlkSize + vardictsize * sizeof(hge)+ wordaligned(sizeof(bte) * MOSgetCnt(task->blk),hge)); break; #endif case TYPE_str: // we only have to look at the index width, not the values switch(task->b->T->width){ - case 1: task->blk = (MosaicBlk)( ((char*)task->blk) + 2* MosaicBlkSize + dictsize * sizeof(bte)+ wordaligned(sizeof(bte) * MOSgetCnt(task->blk),bte)); break; - case 2: task->blk = (MosaicBlk)( ((char*)task->blk) + 2* MosaicBlkSize + dictsize * sizeof(sht)+ wordaligned(sizeof(bte) * MOSgetCnt(task->blk),sht)); break; - case 4: task->blk = (MosaicBlk)( ((char*)task->blk) + 2* MosaicBlkSize + dictsize * sizeof(int)+ wordaligned(sizeof(bte) * MOSgetCnt(task->blk),int)); break; - case 8: task->blk = (MosaicBlk)( ((char*)task->blk) + 2* MosaicBlkSize + dictsize * sizeof(lng)+ wordaligned(sizeof(bte) * MOSgetCnt(task->blk),lng)); break; + case 1: task->blk = (MosaicBlk)( ((char*)task->blk) + 2* MosaicBlkSize + vardictsize * sizeof(bte)+ wordaligned(sizeof(bte) * MOSgetCnt(task->blk),bte)); break; + case 2: task->blk = (MosaicBlk)( ((char*)task->blk) + 2* MosaicBlkSize + vardictsize * sizeof(sht)+ wordaligned(sizeof(bte) * MOSgetCnt(task->blk),sht)); break; + case 4: task->blk = (MosaicBlk)( ((char*)task->blk) + 2* MosaicBlkSize + vardictsize * sizeof(int)+ wordaligned(sizeof(bte) * MOSgetCnt(task->blk),int)); break; + case 8: task->blk = (MosaicBlk)( ((char*)task->blk) + 2* MosaicBlkSize + vardictsize * sizeof(lng)+ wordaligned(sizeof(bte) * MOSgetCnt(task->blk),lng)); break; } break; } @@ -112,7 +117,7 @@ MOSskip_variance(Client cntxt, MOStask t #define estimateVariance(TPE)\ { TPE *v = (TPE*)task->src, val = *v++, delta;\ TPE *dict = (TPE*)((char*)task->dst + 4 * MosaicBlkSize);\ - task->dst = ((char*) dict)+ sizeof(TPE)*dictsize;\ + task->dst = ((char*) dict)+ sizeof(TPE)*vardictsize;\ dict[0]= val;\ *size = *size+1;\ for(i =0; i<task->elm; i++, val++){\ @@ -121,14 +126,14 @@ MOSskip_variance(Client cntxt, MOStask t for(j= 0; j< *size; j++)\ if( dict[j] == delta) {cnt++;break;}\ if ( j == *size){\ - if ( *size == dictsize)\ + if ( *size == vardictsize)\ break;\ dict[j] = delta;\ *size= *size+1;\ cnt++;\ }\ }\ - if(i) factor = (flt) ((int)i * sizeof(int)) / (3 * MosaicBlkSize + sizeof(int) * dictsize +i);\ + if(i) factor = (flt) ((int)i * sizeof(int)) / (3 * MosaicBlkSize + sizeof(int) * vardictsize +i);\ } // calculate the expected reduction using dictionary in terms of elements compressed @@ -156,7 +161,7 @@ MOSestimate_variance(Client cntxt, MOSta case TYPE_int: { int *v = (int*)task->src, val = *v++,delta; int *dict = (int*)((char*)task->dst + 3 * MosaicBlkSize); - task->dst = ((char*) dict)+ sizeof(int)*dictsize; + task->dst = ((char*) dict)+ sizeof(int)*vardictsize; dict[0]= val; *size = *size+1; for(i =0; i<task->elm; i++, v++){ @@ -165,14 +170,14 @@ MOSestimate_variance(Client cntxt, MOSta for(j= 0; j< *size; j++) if( dict[j] == delta) {cnt++;break;} if ( j == *size){ - if ( *size == dictsize) + if ( *size == vardictsize) break; dict[j] = delta; *size= *size+1; cnt++; } } - if(i) factor = (flt) ((int)i * sizeof(int)) / (3 * MosaicBlkSize + sizeof(int) * dictsize +i); + if(i) factor = (flt) ((int)i * sizeof(int)) / (3 * MosaicBlkSize + sizeof(int) * vardictsize +i); } break; case TYPE_str: @@ -195,7 +200,7 @@ MOSestimate_variance(Client cntxt, MOSta { TPE *v = (TPE*)task->src, val=*v++, delta;\ TPE *dict = (TPE*)((char*)task->blk+ 2 * MosaicBlkSize);\ BUN limit = task->elm > MOSlimit()? MOSlimit(): task->elm;\ - task->dst = ((char*) dict)+ sizeof(TPE)*dictsize;\ + task->dst = ((char*) dict)+ sizeof(TPE)*vardictsize;\ dict[0]= val;\ *size = *size+1;\ MOSincCnt(blk,1);\ @@ -209,7 +214,7 @@ MOSestimate_variance(Client cntxt, MOSta break;\ }\ if ( j == *size){\ - if ( *size == dictsize){\ + if ( *size == vardictsize){\ task->dst += wordaligned(MOSgetCnt(blk) %2,TPE);\ break;\ }\ @@ -249,7 +254,7 @@ MOScompress_variance(Client cntxt, MOSta { lng *v = (lng*)task->src, val = *v++, delta; lng *dict = (lng*)((char*)task->blk+ 2 * MosaicBlkSize); BUN limit = task->elm > MOSlimit()? MOSlimit(): task->elm; - task->dst = ((char*) dict)+ sizeof(lng)*dictsize; + task->dst = ((char*) dict)+ sizeof(lng)*vardictsize; dict[0]= val; *size = *size+1; MOSincCnt(blk,1); @@ -263,7 +268,7 @@ MOScompress_variance(Client cntxt, MOSta break; } if ( j == *size){ - if ( *size == dictsize){ + if ( *size == vardictsize){ // align on word boundary task->dst += wordaligned(MOSgetCnt(blk) %2,lng); break; @@ -293,7 +298,7 @@ MOScompress_variance(Client cntxt, MOSta // the inverse operator, extend the src #define VARDICTdecompress(TPE)\ -{ bte *idx = (bte*)(compressed + dictsize * sizeof(TPE));\ +{ bte *idx = (bte*)(compressed + vardictsize * sizeof(TPE));\ TPE *dict = (TPE*) compressed,val = dict[0];\ BUN lim = MOSgetCnt(blk);\ ((TPE*)task->src)[0] = val;\ @@ -324,7 +329,7 @@ MOSdecompress_variance(Client cntxt, MOS case TYPE_hge: VARDICTdecompress(hge); break; #endif case TYPE_int: - { bte *idx = (bte*)(compressed + dictsize * sizeof(int)); + { bte *idx = (bte*)(compressed + vardictsize * sizeof(int)); int *dict = (int*) compressed,val= dict[0]; BUN lim = MOSgetCnt(blk); ((int*)task->src)[0] = val; @@ -351,7 +356,7 @@ MOSdecompress_variance(Client cntxt, MOS #define subselect_variance(TPE) {\ TPE *dict= (TPE*) (((char*) task->blk) + 2 * MosaicBlkSize ),val= dict[0];\ - bte *idx = (bte*) (((char*) task->blk) + 2 * MosaicBlkSize + dictsize * sizeof(TPE));\ + bte *idx = (bte*) (((char*) task->blk) + 2 * MosaicBlkSize + vardictsize * sizeof(TPE));\ if( !*anti){\ if( *(TPE*) low == TPE##_nil && *(TPE*) hgh == TPE##_nil){\ for( ; first < last; first++, idx++){\ @@ -445,7 +450,7 @@ MOSsubselect_variance(Client cntxt, MOS case TYPE_int: // Expanded MOSselect_variance for debugging { int *dict= (int*) (((char*) task->blk) + 2 * MosaicBlkSize) ,val= dict[0]; - bte *idx = (bte*) (((char*) task->blk) + 2 * MosaicBlkSize + dictsize * sizeof(int)); + bte *idx = (bte*) (((char*) task->blk) + 2 * MosaicBlkSize + vardictsize * sizeof(int)); if( !*anti){ if( *(int*) low == int_nil && *(int*) hgh == int_nil){ @@ -516,7 +521,7 @@ MOSsubselect_variance(Client cntxt, MOS subselect_variance(daytime); if( task->type == TYPE_timestamp) { lng *dict= (lng*) (((char*) task->blk) + 2 * MosaicBlkSize) ,val =dict[0]; - bte *idx = (bte*) (((char*) task->blk) + 2 * MosaicBlkSize + dictsize * sizeof(lng)); + bte *idx = (bte*) (((char*) task->blk) + 2 * MosaicBlkSize + vardictsize * sizeof(lng)); int lownil = timestamp_isnil(*(timestamp*)low); int hghnil = timestamp_isnil(*(timestamp*)hgh); @@ -598,7 +603,7 @@ MOSsubselect_variance(Client cntxt, MOS #define thetasubselect_variance(TPE)\ { TPE low,hgh, *dict,w;\ - bte *idx = (bte*) (((char*) task->blk) + 2 * MosaicBlkSize + dictsize * sizeof(lng));\ + bte *idx = (bte*) (((char*) task->blk) + 2 * MosaicBlkSize + vardictsize * sizeof(lng));\ low= hgh = TPE##_nil;\ if ( strcmp(oper,"<") == 0){\ hgh= *(TPE*) val;\ @@ -668,7 +673,7 @@ MOSthetasubselect_variance(Client cntxt, case TYPE_int: { int low,hgh; int *dict= (int*) (((char*) task->blk) + 2 * MosaicBlkSize ),v=dict[0]; - bte *idx = (bte*) (((char*) task->blk) + 2 * MosaicBlkSize + dictsize * sizeof(int)); + bte *idx = (bte*) (((char*) task->blk) + 2 * MosaicBlkSize + vardictsize * sizeof(int)); low= hgh = int_nil; if ( strcmp(oper,"<") == 0){ hgh= *(int*) val; @@ -716,7 +721,7 @@ MOSthetasubselect_variance(Client cntxt, default: if( task->type == TYPE_timestamp){ { lng *dict= (lng*) (((char*) task->blk) + 2 * MosaicBlkSize ), v = dict[0]; - bte *idx = (bte*) (((char*) task->blk) + 2 * MosaicBlkSize + dictsize * sizeof(lng)); + bte *idx = (bte*) (((char*) task->blk) + 2 * MosaicBlkSize + vardictsize * sizeof(lng)); lng low,hgh; low= hgh = int_nil; @@ -763,7 +768,7 @@ MOSthetasubselect_variance(Client cntxt, #define leftfetchjoin_variance(TPE)\ { TPE *v;\ TPE *dict= (TPE*) (((char*) task->blk) + 2 * MosaicBlkSize ),val = dict[0];\ - bte *idx = (bte*) (((char*) task->blk) + 2 * MosaicBlkSize + dictsize * sizeof(TPE));\ + bte *idx = (bte*) (((char*) task->blk) + 2 * MosaicBlkSize + vardictsize * sizeof(TPE));\ v= (TPE*) task->src;\ for(; first < last; first++, val+= dict[*idx++]){\ MOSskipit();\ @@ -795,7 +800,7 @@ MOSleftfetchjoin_variance(Client cntxt, case TYPE_int: { int *v; int *dict= (int*) (((char*) task->blk) + 2 * MosaicBlkSize ),val = dict[0]; - bte *idx = (bte*) (((char*) task->blk) + 2 * MosaicBlkSize + dictsize * sizeof(int)); + bte *idx = (bte*) (((char*) task->blk) + 2 * MosaicBlkSize + vardictsize * sizeof(int)); v= (int*) task->src; for(; first < last; first++, val+=dict[*idx++]){ MOSskipit(); @@ -821,7 +826,7 @@ MOSleftfetchjoin_variance(Client cntxt, #define join_variance(TPE)\ { TPE *w;\ TPE *dict= (TPE*) (((char*) task->blk) + 2 * MosaicBlkSize ),val=dict[0];\ - bte *idx = (bte*) (((char*) task->blk) + 2 * MosaicBlkSize + dictsize * sizeof(int));\ + bte *idx = (bte*) (((char*) task->blk) + 2 * MosaicBlkSize + vardictsize * sizeof(int));\ for(oo= (oid) first; first < last; first++, val+=dict[*idx++], oo++){\ w = (TPE*) task->src;\ for(n = task->elm, o = 0; n -- > 0; w++,o++)\ @@ -856,7 +861,7 @@ MOSjoin_variance(Client cntxt, MOStask case TYPE_int: { int *w; int *dict= (int*) (((char*) task->blk) + 2 * MosaicBlkSize ), val=dict[0]; - bte *idx = (bte*) (((char*) task->blk) + 2 * MosaicBlkSize + dictsize * sizeof(int)); + bte *idx = (bte*) (((char*) task->blk) + 2 * MosaicBlkSize + vardictsize * sizeof(int)); for(oo= (oid) first; first < last; first++, val+= dict[*idx++], oo++){ w = (int*) task->src; for(n = task->elm, o = 0; n -- > 0; w++,o++) _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list