Changeset: b62bd7460530 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=b62bd7460530
Modified Files:
        gdk/gdk.h
        gdk/gdk_imprints.c
Branch: bloomfilters
Log Message:

First take on implementing a bloom filter for a column.


diffs (134 lines):

diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -2196,6 +2196,8 @@ gdk_export void IMPSdestroy(BAT *b);
 gdk_export BAT *BATimprints(BAT *b);
 gdk_export lng IMPSimprintsize(BAT *b);
 
+gdk_export BAT *BATbloom(BAT *b);
+
 /*
  * @- Multilevel Storage Modes
  *
diff --git a/gdk/gdk_imprints.c b/gdk/gdk_imprints.c
--- a/gdk/gdk_imprints.c
+++ b/gdk/gdk_imprints.c
@@ -902,3 +902,118 @@ do {                                                      
                \
        free(s);
 }
 #endif
+
+/* round hashing */
+
+#define smpl_xor_rng(R,X) {\
+R = X; \
+R ^= (R<<13); \
+R ^= (R>>17); \
+R ^= (R<<5); \
+}
+
+#define hash_init(S,X,Y,Z) {\
+smpl_xor_rng(X,S); \
+smpl_xor_rng(Y,X); \
+smpl_xor_rng(Z,Y); \
+}
+
+#define next_hash(N,X,Y,Z) {\
+N = (X^(X<<3))^(Y^(Y>>19))^(Z^(Z<<6)); \
+X = Y; \
+Y = Z; \
+Z = N; \
+}
+
+#define hash_mod(V,MOD) ((V) % (MOD))
+
+BAT *
+BATbloom(BAT *b) {
+       BAT *bn;
+       BUN cnt;
+       BUN mn;
+       BUN p;
+       bit *o;
+
+       assert(BAThdense(b)); /* assert void head */
+
+       switch (ATOMstorage(b->T->type)) {
+       case TYPE_bte:
+       case TYPE_sht:
+       case TYPE_int:
+       case TYPE_lng:
+       case TYPE_flt:
+       case TYPE_dbl:
+               break;
+       default: /* type not supported */
+               GDKerror("#BATbloom: col type not "
+                        "suitable for bloom filters.\n");
+               return b; /* do nothing */
+       }
+
+       BATcheck(b, "BATblooms");
+
+       cnt = BATcount(b);
+       mn = 4 * cnt; /* make it power of 2 for faster modulo */
+
+       bn = BATnew(TYPE_void, TYPE_bit, mn);
+       if (bn == NULL) {
+               GDKerror("#BATbloom: memory allocation error");
+               return NULL;
+       }
+
+       o = (bit *) Tloc(bn, BUNfirst(bn));
+       for (p = 0; (o[p] = 0) && (p < mn); p++);
+
+#define BLOOM_BUILD(TYPE)                                              \
+do {                                                                           
        \
+       oid key,hv,x,y,z; /* for hashing */                     \
+       TYPE *ob = (TYPE *)Tloc(b, BUNfirst(b));        \
+       for (p = 0; p < cnt; p++) {                                     \
+               key = (oid) ob[p];                                              
\
+               hash_init(key, x,y,z);                                  \
+               next_hash(hv, x,y,z);                                   \
+               o[hash_mod(hv,mn)] = 1;                                 \
+               next_hash(hv, x,y,z);                                   \
+               o[hash_mod(hv,mn)] = 1;                                 \
+               next_hash(hv, x,y,z);                                   \
+               o[hash_mod(hv,mn)] = 1;                                 \
+       }                                                                       
                \
+} while (0)
+               switch (ATOMstorage(b->T->type)) {
+               case TYPE_bte:
+                       BLOOM_BUILD(bte);
+                       break;
+               case TYPE_sht:
+                       BLOOM_BUILD(sht);
+                       break;
+               case TYPE_int:
+                       BLOOM_BUILD(int);
+                       break;
+               case TYPE_lng:
+                       BLOOM_BUILD(lng);
+                       break;
+               case TYPE_flt:
+                       BLOOM_BUILD(flt);
+                       break;
+               case TYPE_dbl:
+                       BLOOM_BUILD(dbl);
+                       break;
+               default:
+                       /* should never reach here */
+                       assert(0);
+               }
+
+               /* property management */
+               BATsetcount(bn, mn);
+               bn->trevsorted = 0;
+               bn->tsorted = 0;
+               bn->tkey = 0;
+               bn->tdense = 0;
+               bn->hdense = 1;
+               bn->hseqbase = 0;
+               bn->hkey = 1;
+               bn->hrevsorted = bn->batCount <= 1;
+
+               return bn;
+}
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to