2010/12/15 Jeff Johnson <n3...@mac.com>:
>
> On Dec 14, 2010, at 9:51 PM, Jeff Johnson wrote:
>
>>
>> Download. uncompress. use for file dependencies.
>>
>> I will take wagers on how much smaller the encoding is as
>> soon as you tell me what you choose for {n,p}.
>>
>
> There's an obvious generalization here for primary.xml
> data as well as for all version-less dependencies like
>        Requires: libc.so.6
> and all the variants (see rpm -q --provides glibc spewage).
>
> All of that crap can be collapsed into a single per-package Bloom filter
> that also includes not only all paths contained within a package,
> but also all version-less Provides.
I just played around with rpmbf a bit and tried making a bloom filter
of all the files in a hdlist, and I wonder if I might've gotten things
wrong, size I end up with of the dump is pretty much around the same
as for when compressing the files.xml.lzma:

[peroyv...@lappis samples]$ ./hdlist-bf hdlist-main
n: 1021318 m: 29368178 k: 19
size: 3671022

So I guess there's something I'm not really fully grasping here...

See code attached...

--
Regards,
Per Øvind
#define _RPMGI_INTERNAL
#define _RPMBF_INTERNAL
/* avoid warning */
#define	xrealloc(ptr, size) realloc(ptr, size) 

#include <stdint.h>
#include <rpmio.h>
#include <rpmtag.h>
#include <rpmgi.h>
#include <rpmbf.h>

int main(int argc, char *argv[]) {
    FD_t fd;
    rpmts ts = NULL;
    rpmgi gi = NULL;
    rpmRC rc = RPMRC_NOTFOUND;
    rpmbf bf = NULL;
    size_t n = 0;
    static double e = 1.0e-6;
    size_t m = 0;
    size_t k = 0;
    HE_t he = (HE_t)memset(alloca(sizeof(*he)), 0, sizeof(*he));



    rc = RPMRC_NOTFOUND;
    ts = rpmtsCreate();
    rpmtsSetRootDir(ts, NULL);
    gi = rpmgiNew(ts, RPMDBI_HDLIST, NULL, 0);

    rpmtsSetVSFlags(ts, _RPMVSF_NOSIGNATURES | RPMVSF_NOHDRCHK | _RPMVSF_NOPAYLOAD | _RPMVSF_NOHEADER);
    gi->active = 1;
    gi->fd = Fopen(argv[1], "r.fdio");
    while ((rc = rpmgiNext(gi)) == RPMRC_OK) {
	int rc;
	
	he->tag = RPMTAG_FILEPATHS;
	if(headerGet(gi->h, he, 0)) {
	    n += he->c;
	}
    }

    rpmbfParams(n, e, &m, &k);

    bf = rpmbfNew(m, k, 0);
    
    gi->active = 1;
    gi->fd = Fopen(argv[1], "r.fdio");

    while ((rc = rpmgiNext(gi)) == RPMRC_OK) {
	int rc;
	
	he->tag = RPMTAG_FILEPATHS;
	if(headerGet(gi->h, he, 0)) {
	    for(he->ix = 0; he->ix < (int) he->c; he->ix++)
		rpmbfAdd(bf, he->p.argv[he->ix], 0);
	}
    }

    fd = Fopen("bloom.dump", "w.fdio");
    Fwrite(bf->bits, 1, bf->m/8, fd);
    Fclose(fd);

    printf("n: %zu m: %zu k: %zu\nsize: %zu\n", n, m, k, bf->m/8);

}

Reply via email to