Changeset: c70e531e5970 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=c70e531e5970 Added Files: monetdb5/modules/mal/mosaic_hdr.h monetdb5/modules/mal/mosaic_none.h monetdb5/modules/mal/mosaic_rle.h monetdb5/modules/mal/mosaic_zone.c Modified Files: monetdb5/modules/mal/Makefile.ag monetdb5/modules/mal/Tests/All monetdb5/modules/mal/mosaic.c monetdb5/modules/mal/mosaic.h monetdb5/modules/mal/mosaic_dict.c monetdb5/modules/mal/mosaic_hdr.c monetdb5/modules/mal/mosaic_none.c monetdb5/modules/mal/mosaic_rle.c Branch: mosaic Log Message:
Administrative reshuffling of the code Added the first part of zones and pre-filtering on oid candidate lists diffs (truncated from 1526 to 300 lines): diff --git a/monetdb5/modules/mal/Makefile.ag b/monetdb5/modules/mal/Makefile.ag --- a/monetdb5/modules/mal/Makefile.ag +++ b/monetdb5/modules/mal/Makefile.ag @@ -63,6 +63,11 @@ lib_mal = { sample.c sample.h \ json_util.c json_util.h \ mosaic.c mosaic.h \ + mosaic_hdr.c mosaic_hdr.h \ + mosaic_none.c mosaic_none.h \ + mosaic_rle.c mosaic_rle.h \ + mosaic_dict.c mosaic_dict.h \ + mosaic_zone.c mosaic_zone.h \ calc.c batcalc.c } diff --git a/monetdb5/modules/mal/Tests/All b/monetdb5/modules/mal/Tests/All --- a/monetdb5/modules/mal/Tests/All +++ b/monetdb5/modules/mal/Tests/All @@ -71,6 +71,7 @@ mosaic_none mosaic_rle mosaic_mix mosaic_dict +mosaic_zone mosaic_none_double diff --git a/monetdb5/modules/mal/mosaic.c b/monetdb5/modules/mal/mosaic.c --- a/monetdb5/modules/mal/mosaic.c +++ b/monetdb5/modules/mal/mosaic.c @@ -25,117 +25,13 @@ #include "monetdb_config.h" #include "mosaic.h" -#include "mtime.h" -#include "math.h" -#include "opt_prelude.h" -#include "algebra.h" +#include "mosaic_hdr.h" +#include "mosaic_none.h" +#include "mosaic_rle.h" +#include "mosaic_dict.h" +#include "mosaic_zone.h" -//#define _DEBUG_MOSAIC_ - -#define MOSAIC_VERSION 20140808 - -/* do not invest in compressing BATs smaller than this */ -#define MIN_INPUT_COUNT 1 - -/* The compressor kinds currently hardwired */ -#define MOSAIC_METHODS 6 -#define MOSAIC_NONE 0 // no compression at all -#define MOSAIC_RLE 1 // use run-length encoding -#define MOSAIC_DICT 2 // local dictionary encoding -#define MOSAIC_DELTA 3 // use delta encoding -#define MOSAIC_BITMAP 4 // use limited set of bitmaps -#define MOSAIC_ZONES 5 // zone map over non-compressed data -#define MOSAIC_EOL 6 // marker for the last block - -static char *filtername[]={"none","rle","dict","delta","bitmap","zones","EOL"}; - -//Compression should have a significant reduction to apply. -#define COMPRESS_THRESHOLD 50 //percent - -/* - * The header is reserved for meta information, e.g. oid indices. - * The block header encodes the information needed for the chunk decompressor - */ -#define MOSAICINDEX 4 //> 2 elements -typedef struct MOSAICHEADER{ - int version; - int top; - oid index[MOSAICINDEX]; - BUN offset[MOSAICINDEX]; -} * MosaicHdr; - -typedef struct MOSAICBLOCK{ - bte tag; // method applied in chunk - bte prop[7];// properties needed by compression scheme. - BUN cnt; // compression specific information -} *MosaicBlk; - -#define wordaligned(SZ) \ - ((SZ) + ((SZ) % sizeof(int)? sizeof(int) - ((SZ)%sizeof(int)) : 0)) - -#define MosaicHdrSize wordaligned(sizeof(struct MOSAICHEADER)) -#define MosaicBlkSize wordaligned(sizeof(struct MOSAICBLOCK)) - - -typedef struct MOSTASK{ - int type; // one of the permissible types - MosaicHdr hdr; // start of the destination heap - MosaicBlk blk; // current block header - char *dst; // write pointer into current compressed blocks - - BUN elm; // elements left to compress - char *src; // read pointer into source - - oid *lb, *rb; // Collected oids from operations - oid *cl; // candidate admin - lng n; // element count in candidate list - - BAT *lbat, *rbat; // for the joins, where we dont know their size upfront - - // collect compression statistics for the particular task - lng time[MOSAIC_METHODS]; - lng wins[MOSAIC_METHODS]; - lng elms[MOSAIC_METHODS]; -} *MOStask; - -/* we keep a condensed OID index anchored to the compressed blocks */ - -typedef struct MOSINDEX{ - lng offset; // header location within compressed heap - lng nullcnt; // number of nulls encountered - ValRecord low,hgh; // zone value markers for fix-length types -} *mosaicindex; - -/* Run through a column to produce a compressed version */ - -#ifdef _MSC_VER -#define nextafter _nextafter -float nextafterf(float x, float y); -#endif - -#define PREVVALUEbit(x) ((x) - 1) -#define PREVVALUEbte(x) ((x) - 1) -#define PREVVALUEsht(x) ((x) - 1) -#define PREVVALUEint(x) ((x) - 1) -#define PREVVALUElng(x) ((x) - 1) -#define PREVVALUEoid(x) ((x) - 1) -#define PREVVALUEflt(x) nextafterf((x), -GDK_flt_max) -#define PREVVALUEdbl(x) nextafter((x), -GDK_dbl_max) - -#define NEXTVALUEbit(x) ((x) + 1) -#define NEXTVALUEbte(x) ((x) + 1) -#define NEXTVALUEsht(x) ((x) + 1) -#define NEXTVALUEint(x) ((x) + 1) -#define NEXTVALUElng(x) ((x) + 1) -#define NEXTVALUEoid(x) ((x) + 1) -#define NEXTVALUEflt(x) nextafterf((x), GDK_flt_max) -#define NEXTVALUEdbl(x) nextafter((x), GDK_dbl_max) - -/* simple include the details of the hardwired compressors */ -#include "mosaic_hdr.c" -#include "mosaic_none.c" -#include "mosaic_rle.c" -#include "mosaic_dict.c" +static char *filtername[]={"none","rle","dict","delta","bitmap","zone","EOL"}; static void MOSinit(MOStask task, BAT *b){ @@ -147,16 +43,6 @@ MOSinit(MOStask task, BAT *b){ task->dst = base + MosaicBlkSize; } -/* -static void -MOSclose(MOStask task){ - if( task->blk->cnt == 0){ - task->dst -= MosaicBlkSize; - return; - } -} -*/ - static void MOSdumpTask(Client cntxt,MOStask task) { @@ -203,6 +89,10 @@ MOSdumpInternal(Client cntxt, BAT *b){ MOSdump_dict(cntxt,task); MOSskip_dict(task); break; + case MOSAIC_ZONE: + MOSdump_zone(cntxt,task); + MOSskip_zone(task); + break; default: assert(0); } } @@ -357,10 +247,10 @@ MOScompressInternal(Client cntxt, int *r cand = MOSAIC_NONE; ch =0; chunksize = 1; - // collect the opportunities for compression + + // select candidate amongst those if (filter[MOSAIC_RLE]) ch = MOSestimate_rle(cntxt,task); - // select candidate amongst those if ( ch > chunksize){ cand = MOSAIC_RLE; chunksize = ch; @@ -371,6 +261,12 @@ MOScompressInternal(Client cntxt, int *r cand = MOSAIC_DICT; chunksize = ch; } + if (filter[MOSAIC_ZONE]) + ch = MOSestimate_zone(cntxt,task); + if ( ch > chunksize){ + cand = MOSAIC_ZONE; + chunksize = ch; + } // apply the compression to a chunk switch(cand){ @@ -412,12 +308,15 @@ MOScompressInternal(Client cntxt, int *r task->blk->cnt = 0; task->dst = ((char*) task->blk)+ MosaicBlkSize; break; + case MOSAIC_ZONE: + MOScompress_zone(cntxt,task); + break; default : // continue to use the last block header. MOScompress_none(cntxt,task); } } - if( task->blk->tag == MOSAIC_NONE && task->blk->cnt){ + if( (task->blk->tag == MOSAIC_NONE || task->blk->tag == MOSAIC_ZONE) && task->blk->cnt){ MOSupdateHeader(cntxt,task); MOSadvance_none(task); task->dst = ((char*) task->blk)+ MosaicBlkSize; @@ -522,6 +421,10 @@ MOSdecompressInternal(Client cntxt, int MOSdecompress_rle(cntxt,task); MOSskip_rle(task); break; + case MOSAIC_ZONE: + MOSdecompress_zone(cntxt,task); + MOSskip_zone(task); + break; default: assert(0); } } diff --git a/monetdb5/modules/mal/mosaic.h b/monetdb5/modules/mal/mosaic.h --- a/monetdb5/modules/mal/mosaic.h +++ b/monetdb5/modules/mal/mosaic.h @@ -26,25 +26,141 @@ #include "mal_exception.h" #include "mal_function.h" +#include "mtime.h" +#include "math.h" +#include "opt_prelude.h" +#include "algebra.h" + +//#define _DEBUG_MOSAIC_ + +#define MOSAIC_VERSION 20140808 + +/* do not invest in compressing BATs smaller than this */ +#define MIN_INPUT_COUNT 1 + +/* The compressor kinds currently hardwired */ +#define MOSAIC_METHODS 6 +#define MOSAIC_NONE 0 // no compression at all +#define MOSAIC_RLE 1 // use run-length encoding +#define MOSAIC_DICT 2 // local dictionary encoding +#define MOSAIC_DELTA 3 // use delta encoding +#define MOSAIC_BITMAP 4 // use limited set of bitmaps +#define MOSAIC_ZONE 5 // adaptive zone map over non-compressed data +#define MOSAIC_EOL 6 // marker for the last block + +//Compression should have a significant reduction to apply. +#define COMPRESS_THRESHOLD 50 //percent + +/* + * The header is reserved for meta information, e.g. oid indices. + * The block header encodes the information needed for the chunk decompressor + */ +#define MOSAICINDEX 4 //> 2 elements +typedef struct MOSAICHEADER{ + int version; + int top; + oid index[MOSAICINDEX]; + BUN offset[MOSAICINDEX]; +} * MosaicHdr; + +typedef struct MOSAICBLOCK{ + bte tag; // method applied in chunk + bte prop[7];// properties needed by compression scheme. + BUN cnt; // compression specific information +} *MosaicBlk; + +#define wordaligned(SZ) \ + ((SZ) + ((SZ) % sizeof(int)? sizeof(int) - ((SZ)%sizeof(int)) : 0)) + +#define MosaicHdrSize wordaligned(sizeof(struct MOSAICHEADER)) +#define MosaicBlkSize wordaligned(sizeof(struct MOSAICBLOCK)) + + +typedef struct MOSTASK{ + int type; // one of the permissible types + MosaicHdr hdr; // start of the destination heap + MosaicBlk blk; // current block header + char *dst; // write pointer into current compressed blocks + + BUN elm; // elements left to compress + char *src; // read pointer into source _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list