Changeset: 669b4dea64ad for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/669b4dea64ad
Branch: default
Log Message:

Merge dict branch into default.


diffs (truncated from 3484 to 300 lines):

diff --git a/clients/Tests/MAL-signatures.stable.out 
b/clients/Tests/MAL-signatures.stable.out
--- a/clients/Tests/MAL-signatures.stable.out
+++ b/clients/Tests/MAL-signatures.stable.out
@@ -8449,6 +8449,15 @@
 [ "color",     "value",        "command color.value(X_0:color):flt ",  
"CLRvalue;",    "Extracts value component from a color atom"    ]
 [ "color",     "value",        "command color.value(X_0:color):int ",  
"CLRvalueInt;", "Extracts value component from a color atom"    ]
 [ "color",     "ycc",  "command color.ycc(X_0:int, X_1:int, X_2:int):color ",  
"CLRycc;",      "Converts an YCC triplets to a color atom"      ]
+[ "dict",      "compress",     "pattern dict.compress(X_0:bat[:any_1]) 
(X_1:bat[:any], X_2:bat[:any_1]) ",     "DICTcompress;",        "dict compress 
a bat"   ]
+[ "dict",      "compress",     "pattern dict.compress(X_0:str, X_1:str, 
X_2:str):void ",       "DICTcompress_col;",    "compress a sql column" ]
+[ "dict",      "compress",     "pattern dict.compress(X_0:str, X_1:str, 
X_2:str, X_3:bit):void ",      "DICTcompress_col;",    "compress a sql column" ]
+[ "dict",      "convert",      "pattern 
dict.convert(X_0:bat[:any]):bat[:any_1] ",     "DICTconvert;", "convert 
candidate list into compressed offsets"        ]
+[ "dict",      "decompress",   "pattern dict.decompress(X_0:bat[:any], 
X_1:bat[:any_1]):bat[:any_1] ", "DICTdecompress;",      "decompress a 
dictionary compressed (sub)column"        ]
+[ "dict",      "join", "pattern dict.join(X_0:bat[:any], X_1:bat[:any_1], 
X_2:bat[:any], X_3:bat[:any_1], X_4:bat[:oid], X_5:bat[:oid], X_6:bit, X_7:lng) 
(X_8:bat[:oid], X_9:bat[:oid]) ",    "DICTjoin;",    "join 2 dictionaries"   ]
+[ "dict",      "renumber",     "pattern dict.renumber(X_0:bat[:any_1], 
X_1:bat[:any_1]):bat[:any_1] ", "DICTrenumber;",        "renumber offsets"      
]
+[ "dict",      "select",       "pattern dict.select(X_0:bat[:any], 
X_1:bat[:oid], X_2:bat[:any_1], X_3:any_1, X_4:any_1, X_5:bit, X_6:bit, 
X_7:bit, X_8:bit):bat[:oid] ",      "DICTselect;",  "value - range select on a 
dictionary"  ]
+[ "dict",      "thetaselect",  "pattern dict.thetaselect(X_0:bat[:any], 
X_1:bat[:oid], X_2:bat[:any_1], X_3:any_1, X_4:str):bat[:oid] ",       
"DICTthetaselect;",     "thetaselect on a dictionary"   ]
 [ "factories", "getArrival",   "command factories.getArrival():bat[:timestamp] 
",      "FCTgetArrival;",       "Retrieve the time stamp the last call was 
made."       ]
 [ "factories", "getCaller",    "command factories.getCaller():int ",   
"FCTgetCaller;",        "Retrieve the unique identity of the factory caller."   
]
 [ "factories", "getDeparture", "command 
factories.getDeparture():bat[:timestamp] ",    "FCTgetDeparture;",     
"Retrieve the time stamp the last answer was returned." ]
@@ -8461,6 +8470,8 @@
 [ "fits",      "listdir",      "unsafe pattern fits.listdir(X_0:str):void ",   
"FITSdir;",     "Attach all FITS files in the directory"        ]
 [ "fits",      "listdirpattern",       "unsafe pattern 
fits.listdirpattern(X_0:str, X_1:str):void ",   "FITSdirpat;",  "Attach all 
FITS file in the directory, giving a pattern"       ]
 [ "fits",      "load", "unsafe pattern fits.load(X_0:str):void ",      
"FITSloadTable;",       "Load a FITS table from an attached file"       ]
+[ "for",       "compress",     "pattern for.compress(X_0:str, X_1:str, 
X_2:str):void ",        "FORcompress_col;",     "compress a sql column"]
+[ "for",       "decompress",   "pattern for.decompress(X_0:bat[:any], 
X_1:any_1):bat[:any_1] ",        "FORdecompress;",       "decompress a for 
compressed (sub)column"       ]
 [ "generator", "join", "pattern generator.join(X_0:bat[:bte], X_1:bat[:bte]) 
(X_2:bat[:oid], X_3:bat[:oid]) ", "VLTgenerator_join;",   ""      ]
 [ "generator", "join", "pattern generator.join(X_0:bat[:dbl], X_1:bat[:dbl]) 
(X_2:bat[:oid], X_3:bat[:oid]) ", "VLTgenerator_join;",   "Overloaded join 
operation"     ]
 [ "generator", "join", "pattern generator.join(X_0:bat[:flt], X_1:bat[:flt]) 
(X_2:bat[:oid], X_3:bat[:oid]) ", "VLTgenerator_join;",   ""      ]
@@ -9100,11 +9111,15 @@
 [ "optimizer", "default_pipe", "function optimizer.default_pipe():void;",      
"",     ""      ]
 [ "optimizer", "defaultfast",  "pattern optimizer.defaultfast():str ", 
"OPTwrapper;",  ""      ]
 [ "optimizer", "defaultfast",  "pattern optimizer.defaultfast(X_0:str, 
X_1:str):str ", "OPTwrapper;",  "Fast compound default optimizer pipe"  ]
+[ "optimizer", "dict", "pattern optimizer.dict():str ",        "OPTwrapper;",  
""      ]
+[ "optimizer", "dict", "pattern optimizer.dict(X_0:str, X_1:str):str ",        
"OPTwrapper;",  "Push dict decompress down"     ]
 [ "optimizer", "emptybind",    "pattern optimizer.emptybind():str ",   
"OPTwrapper;",  ""      ]
 [ "optimizer", "emptybind",    "pattern optimizer.emptybind(X_0:str, 
X_1:str):str ",   "OPTwrapper;",  "Evaluate empty set expressions"        ]
 [ "optimizer", "epilogue",     "command optimizer.epilogue():void ",   
"optimizer_epilogue;",  "release the resources held by the optimizer module"    
]
 [ "optimizer", "evaluate",     "pattern optimizer.evaluate():str ",    
"OPTwrapper;",  ""      ]
 [ "optimizer", "evaluate",     "pattern optimizer.evaluate(X_0:str, 
X_1:str):str ",    "OPTwrapper;",  "Evaluate constant expressions once"    ]
+[ "optimizer", "for",  "pattern optimizer.for():str ", "OPTwrapper;",  ""      
]
+[ "optimizer", "for",  "pattern optimizer.for(X_0:str, X_1:str):str ", 
"OPTwrapper;",  "Push for decompress down"      ]
 [ "optimizer", "garbageCollector",     "pattern 
optimizer.garbageCollector():str ",    "OPTwrapper;",  ""      ]
 [ "optimizer", "garbageCollector",     "pattern 
optimizer.garbageCollector(X_0:str, X_1:str):str ",    "OPTwrapper;",  "Garbage 
collector optimizer"   ]
 [ "optimizer", "generator",    "pattern optimizer.generator():str ",   
"OPTwrapper;",  ""      ]
diff --git a/clients/Tests/MAL-signatures.stable.out.int128 
b/clients/Tests/MAL-signatures.stable.out.int128
--- a/clients/Tests/MAL-signatures.stable.out.int128
+++ b/clients/Tests/MAL-signatures.stable.out.int128
@@ -11734,6 +11734,15 @@
 [ "color",     "value",        "command color.value(X_0:color):flt ",  
"CLRvalue;",    "Extracts value component from a color atom"    ]
 [ "color",     "value",        "command color.value(X_0:color):int ",  
"CLRvalueInt;", "Extracts value component from a color atom"    ]
 [ "color",     "ycc",  "command color.ycc(X_0:int, X_1:int, X_2:int):color ",  
"CLRycc;",      "Converts an YCC triplets to a color atom"      ]
+[ "dict",      "compress",     "pattern dict.compress(X_0:bat[:any_1]) 
(X_1:bat[:any], X_2:bat[:any_1]) ",     "DICTcompress;",        "dict compress 
a bat"   ]
+[ "dict",      "compress",     "pattern dict.compress(X_0:str, X_1:str, 
X_2:str):void ",       "DICTcompress_col;",    "compress a sql column" ]
+[ "dict",      "compress",     "pattern dict.compress(X_0:str, X_1:str, 
X_2:str, X_3:bit):void ",      "DICTcompress_col;",    "compress a sql column" ]
+[ "dict",      "convert",      "pattern 
dict.convert(X_0:bat[:any]):bat[:any_1] ",     "DICTconvert;", "convert 
candidate list into compressed offsets"        ]
+[ "dict",      "decompress",   "pattern dict.decompress(X_0:bat[:any], 
X_1:bat[:any_1]):bat[:any_1] ", "DICTdecompress;",      "decompress a 
dictionary compressed (sub)column"        ]
+[ "dict",      "join", "pattern dict.join(X_0:bat[:any], X_1:bat[:any_1], 
X_2:bat[:any], X_3:bat[:any_1], X_4:bat[:oid], X_5:bat[:oid], X_6:bit, X_7:lng) 
(X_8:bat[:oid], X_9:bat[:oid]) ",    "DICTjoin;",    "join 2 dictionaries"   ]
+[ "dict",      "renumber",     "pattern dict.renumber(X_0:bat[:any_1], 
X_1:bat[:any_1]):bat[:any_1] ", "DICTrenumber;",        "renumber offsets"      
]
+[ "dict",      "select",       "pattern dict.select(X_0:bat[:any], 
X_1:bat[:oid], X_2:bat[:any_1], X_3:any_1, X_4:any_1, X_5:bit, X_6:bit, 
X_7:bit, X_8:bit):bat[:oid] ",      "DICTselect;",  "value - range select on a 
dictionary"  ]
+[ "dict",      "thetaselect",  "pattern dict.thetaselect(X_0:bat[:any], 
X_1:bat[:oid], X_2:bat[:any_1], X_3:any_1, X_4:str):bat[:oid] ",       
"DICTthetaselect;",     "thetaselect on a dictionary"   ]
 [ "factories", "getArrival",   "command factories.getArrival():bat[:timestamp] 
",      "FCTgetArrival;",       "Retrieve the time stamp the last call was 
made."       ]
 [ "factories", "getCaller",    "command factories.getCaller():int ",   
"FCTgetCaller;",        "Retrieve the unique identity of the factory caller."   
]
 [ "factories", "getDeparture", "command 
factories.getDeparture():bat[:timestamp] ",    "FCTgetDeparture;",     
"Retrieve the time stamp the last answer was returned." ]
@@ -11746,6 +11755,8 @@
 [ "fits",      "listdir",      "pattern fits.listdir(X_0:str):void ",  
"FITSdir;",     "Attach all FITS files in the directory"        ]
 [ "fits",      "listdirpattern",       "pattern fits.listdirpattern(X_0:str, 
X_1:str):void ",  "FITSdirpat;",  "Attach all FITS file in the directory, 
giving a pattern"       ]
 [ "fits",      "load", "pattern fits.load(X_0:str):void ",     
"FITSloadTable;",       "Load a FITS table from an attached file"       ]
+[ "for",       "compress",     "pattern for.compress(X_0:str, X_1:str, 
X_2:str):void ",        "FORcompress_col;",     "compress a sql column"]
+[ "for",       "decompress",   "pattern for.decompress(X_0:bat[:any], 
X_1:any_1):bat[:any_1] ",        "FORdecompress;",       "decompress a for 
compressed (sub)column"       ]
 [ "generator", "join", "pattern generator.join(X_0:bat[:bte], X_1:bat[:bte]) 
(X_2:bat[:oid], X_3:bat[:oid]) ", "VLTgenerator_join;",   ""      ]
 [ "generator", "join", "pattern generator.join(X_0:bat[:dbl], X_1:bat[:dbl]) 
(X_2:bat[:oid], X_3:bat[:oid]) ", "VLTgenerator_join;",   "Overloaded join 
operation"     ]
 [ "generator", "join", "pattern generator.join(X_0:bat[:flt], X_1:bat[:flt]) 
(X_2:bat[:oid], X_3:bat[:oid]) ", "VLTgenerator_join;",   ""      ]
@@ -12400,11 +12411,15 @@
 [ "optimizer", "default_pipe", "function optimizer.default_pipe():void;",      
"",     ""      ]
 [ "optimizer", "defaultfast",  "pattern optimizer.defaultfast():str ", 
"OPTwrapper;",  ""      ]
 [ "optimizer", "defaultfast",  "pattern optimizer.defaultfast(X_0:str, 
X_1:str):str ", "OPTwrapper;",  "Fast compound default optimizer pipe"  ]
+[ "optimizer", "dict", "pattern optimizer.dict():str ",        "OPTwrapper;",  
""      ]
+[ "optimizer", "dict", "pattern optimizer.dict(X_0:str, X_1:str):str ",        
"OPTwrapper;",  "Push dict decompress down"     ]
 [ "optimizer", "emptybind",    "pattern optimizer.emptybind():str ",   
"OPTwrapper;",  ""      ]
 [ "optimizer", "emptybind",    "pattern optimizer.emptybind(X_0:str, 
X_1:str):str ",   "OPTwrapper;",  "Evaluate empty set expressions"        ]
 [ "optimizer", "epilogue",     "command optimizer.epilogue():void ",   
"optimizer_epilogue;",  "release the resources held by the optimizer module"    
]
 [ "optimizer", "evaluate",     "pattern optimizer.evaluate():str ",    
"OPTwrapper;",  ""      ]
 [ "optimizer", "evaluate",     "pattern optimizer.evaluate(X_0:str, 
X_1:str):str ",    "OPTwrapper;",  "Evaluate constant expressions once"    ]
+[ "optimizer", "for",  "pattern optimizer.for():str ", "OPTwrapper;",  ""      
]
+[ "optimizer", "for",  "pattern optimizer.for(X_0:str, X_1:str):str ", 
"OPTwrapper;",  "Push for decompress down"      ]
 [ "optimizer", "garbageCollector",     "pattern 
optimizer.garbageCollector():str ",    "OPTwrapper;",  ""      ]
 [ "optimizer", "garbageCollector",     "pattern 
optimizer.garbageCollector(X_0:str, X_1:str):str ",    "OPTwrapper;",  "Garbage 
collector optimizer"   ]
 [ "optimizer", "generator",    "pattern optimizer.generator():str ",   
"OPTwrapper;",  ""      ]
diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -901,6 +901,7 @@ const char *columnRef;
 const char *comment_onRef;
 const char *commitRef;
 str compileString(Symbol *fcn, Client c, str s);
+const char *compressRef;
 char *concatErrors(char *err1, const char *err2) __attribute__((__nonnull__(1, 
2))) __attribute__((__returns_nonnull__));
 const char *connectRef;
 const char *contextRef;
@@ -934,6 +935,7 @@ const char *dateRef;
 const char *dblRef;
 str deblockdataflow(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci);
 void debugFunction(stream *fd, MalBlkPtr mb, MalStkPtr stk, int flg, int 
first, int size);
+const char *decompressRef;
 int defConstant(MalBlkPtr mb, int type, ValPtr cst);
 const char *defineRef;
 void delArgument(InstrPtr p, int varid);
@@ -944,6 +946,7 @@ const char *deltaRef;
 const char *dense_rankRef;
 const char *deregisterRef;
 malType destinationType(MalBlkPtr mb, InstrPtr p);
+const char *dictRef;
 const char *diffRef;
 const char *diffcandRef;
 const char *differenceRef;
@@ -985,6 +988,7 @@ const char *first_valueRef;
 const char *firstnRef;
 Module fixModule(const char *nme);
 int fndConstant(MalBlkPtr mb, const ValRecord *cst, int depth);
+const char *forRef;
 void freeException(str);
 void freeInstruction(InstrPtr p);
 void freeMalBlk(MalBlkPtr mb);
@@ -1297,6 +1301,7 @@ const char *rename_columnRef;
 const char *rename_schemaRef;
 const char *rename_tableRef;
 const char *rename_userRef;
+const char *renumberRef;
 const char *replaceRef;
 const char *replicatorRef;
 void resetMalBlk(MalBlkPtr mb);
diff --git a/gdk/gdk_group.c b/gdk/gdk_group.c
--- a/gdk/gdk_group.c
+++ b/gdk/gdk_group.c
@@ -598,6 +598,54 @@ ctz(oid x)
        /* EQUAL  */    cmp(v, BUNtail(bi, hb)) == 0            \
        )
 
+#define GRP_small_values(BG, BV, GV)                                   \
+       do {                                                            \
+               uint##BG##_t sgrps[1 << BG];                            \
+               const uint##BV##_t *restrict w = (const uint##BV##_t *) 
bi.base; \
+               uint##BG##_t v;                                         \
+               memset(sgrps, 0xFF, sizeof(sgrps));                     \
+               if (histo)                                              \
+                       memset(cnts, 0, maxgrps * sizeof(lng));         \
+               ngrp = 0;                                               \
+               gn->tsorted = true;                                     \
+               if (ci.tpe == cand_dense) {                             \
+                       TIMEOUT_LOOP_IDX(r, cnt, timeoffset) {          \
+                               oid o = canditer_next_dense(&ci);       \
+                               p = o - b->hseqbase;                    \
+                               uint##BG##_t x = GV;                    \
+                               if ((v = sgrps[x]) == (uint##BG##_t) ~0 && ngrp 
< (1 << BG)) { \
+                                       sgrps[x] = v = (uint##BG##_t) ngrp++; \
+                                       maxgrppos = r;                  \
+                                       if (extents)                    \
+                                               exts[v] = o;            \
+                               }                                       \
+                               ngrps[r] = v;                           \
+                               if (r > 0 && v < ngrps[r - 1])          \
+                                       gn->tsorted = false;            \
+                               if (histo)                              \
+                                       cnts[v]++;                      \
+                       }                                               \
+               } else {                                                \
+                       TIMEOUT_LOOP_IDX(r, cnt, timeoffset) {          \
+                               oid o = canditer_next(&ci);             \
+                               p = o - b->hseqbase;                    \
+                               uint##BG##_t x = GV;                    \
+                               if ((v = sgrps[x]) == (uint##BG##_t) ~0 && ngrp 
< (1 << BG)) { \
+                                       sgrps[x] = v = (uint##BG##_t) ngrp++; \
+                                       maxgrppos = r;                  \
+                                       if (extents)                    \
+                                               exts[v] = o;            \
+                               }                                       \
+                               ngrps[r] = v;                           \
+                               if (r > 0 && v < ngrps[r - 1])          \
+                                       gn->tsorted = false;            \
+                               if (histo)                              \
+                                       cnts[v]++;                      \
+                       }                                               \
+               }                                                       \
+               TIMEOUT_CHECK(timeoffset,                               \
+                             GOTO_LABEL_TIMEOUT_HANDLER(error));       \
+       } while (0)
 
 gdk_return
 BATgroup_internal(BAT **groups, BAT **extents, BAT **histo,
@@ -691,9 +739,8 @@ BATgroup_internal(BAT **groups, BAT **ex
        }
        assert(!BATtdense(b));
        if (g) {
-               if (BATtdense(g))
-                       maxgrp = g->tseqbase + BATcount(g);
-               else if (BATtordered(g))
+               assert(!BATtdense(g));
+               if (BATtordered(g))
                        maxgrp = * (oid *) Tloc(g, BATcount(g) - 1);
                else if (BATtrevordered(g))
                        maxgrp = * (oid *) Tloc(g, 0);
@@ -814,8 +861,11 @@ BATgroup_internal(BAT **groups, BAT **ex
                maxgrps = GROUPBATINCR;
        bi = bat_iterator(b);
 
-       if (bi.width <= 2)
+       if (bi.width <= 2) {
                maxgrps = (BUN) 1 << (8 * bi.width);
+               if (bi.width == 1 && maxgrp < 256)
+                       maxgrps *= maxgrp;
+       }
        if (extents) {
                en = COLnew(0, TYPE_oid, maxgrps, TRANSIENT);
                if (en == NULL)
@@ -860,7 +910,26 @@ BATgroup_internal(BAT **groups, BAT **ex
                }
        }
 
-       if (subsorted ||
+       if (g == NULL && t == TYPE_bte) {
+               /* byte-sized values, use 256 entry array to keep
+                * track of doled out group ids; note that we can't
+                * possibly have more than 256 groups, so the group id
+                * fits in a uint8_t */
+               GRP_small_values(8, 8, w[p]);
+       } else if (t == TYPE_bte && maxgrp < 256) {
+               /* subgrouping byte-sized values with a limited number
+                * of groups, use 65536 entry array to keep track of
+                * doled out group ids; note that we can't possibly have
+                * more than 65536 goups, so the group id fits in a
+                * uint16_t */
+               GRP_small_values(16, 8, (uint16_t) (w[p] | (grps[p] << 8)));
+       } else if (g == NULL && t == TYPE_sht) {
+               /* short-sized values, use 65536 entry array to keep
+                * track of doled out group ids; note that we can't
+                * possibly have more than 65536 groups, so the group
+                * id fits in a uint16_t */
+               GRP_small_values(16, 16, w[p]);
+       } else if (subsorted ||
            ((BATordered(b) || BATordered_rev(b)) &&
             (g == NULL || BATordered(g) || BATordered_rev(g)))) {
                /* we only need to compare each entry with the previous */
@@ -961,73 +1030,6 @@ BATgroup_internal(BAT **groups, BAT **ex
                }
 
                GDKfree(pgrp);
-       } else if (g == NULL && t == TYPE_bte) {
-               /* byte-sized values, use 256 entry array to keep
-                * track of doled out group ids; note that we can't
-                * possibly have more than 256 groups, so the group id
-                * fits in an uint8_t */
-               uint8_t bgrps[256];
-               const uint8_t *restrict w = (const uint8_t *) bi.base;
-               uint8_t v;
-
-               algomsg = "byte-sized groups -- ";
-               memset(bgrps, 0xFF, sizeof(bgrps));
-               if (histo)
-                       memset(cnts, 0, maxgrps * sizeof(lng));
-               ngrp = 0;
-               gn->tsorted = true;
-               TIMEOUT_LOOP_IDX(r, cnt, timeoffset) {
-                       oid o = canditer_next(&ci);
-                       p = o - b->hseqbase;
-                       if ((v = bgrps[w[p]]) == 0xFF && ngrp < 256) {
-                               bgrps[w[p]] = v = (uint8_t) ngrp++;
-                               maxgrppos = r;
-                               if (extents)
-                                       exts[v] = o;
-                       }
-                       ngrps[r] = v;
-                       if (r > 0 && v < ngrps[r - 1])
-                               gn->tsorted = false;
-                       if (histo)
-                               cnts[v]++;
-               }
-               TIMEOUT_CHECK(timeoffset,
-                             GOTO_LABEL_TIMEOUT_HANDLER(error));
-       } else if (g == NULL && t == TYPE_sht) {
-               /* short-sized values, use 65536 entry array to keep
-                * track of doled out group ids; note that we can't
-                * possibly have more than 65536 groups, so the group
-                * id fits in an uint16_t */
-               uint16_t *restrict sgrps = GDKmalloc(65536 * sizeof(short));
-               const uint16_t *restrict w = (const uint16_t *) bi.base;
-               uint16_t v;
-
-               algomsg = "short-sized groups -- ";
-               if (sgrps == NULL)
-                       goto error1;
-               memset(sgrps, 0xFF, 65536 * sizeof(uint16_t));
-               if (histo)
-                       memset(cnts, 0, maxgrps * sizeof(lng));
-               ngrp = 0;
-               gn->tsorted = true;
-               TIMEOUT_LOOP_IDX(r, cnt, timeoffset) {
-                       oid o = canditer_next(&ci);
-                       p = o - b->hseqbase;
-                       if ((v = sgrps[w[p]]) == 0xFFFF && ngrp < 65536) {
-                               sgrps[w[p]] = v = (uint16_t) ngrp++;
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to