On Fri, Feb 19, 2010 at 03:40:10PM +0000, Peter Boncz wrote:
> Update of /cvsroot/monetdb/MonetDB/src/gdk
> In directory sfp-cvsdas-1.v30.ch3.sourceforge.com:/tmp/cvs-serv10221
>
> Modified Files:
> Tag: Feb2010
> gdk_posix.mx gdk_storage.mx
> Log Message:
> - fix newly introduced bug (BUF_SEQUENTIAL passed to posix_madvise iso
> MMAP_SEQUENTIAL)
> most of this was done by Stefan, but the intent of the code was when all
> users
> are gone, to give uniform (sequential) advise to the whole heap
> - make sure the length of advise is page aligned (not strictly needed for
> Linux, but maybe of other Un*ces)
> - in case of shared vheaps (as done by leftfetchjoin into a string bat),
> assume that
> access to the string heap will be random.
shouldn't
len & ~MT_pagesize()
then rather be
len & ~(MT_pagesize()-1)
(assuming pages size is usually a power of 2)
?
Stefan
>
>
> Index: gdk_storage.mx
> ===================================================================
> RCS file: /cvsroot/monetdb/MonetDB/src/gdk/gdk_storage.mx,v
> retrieving revision 1.149.2.36
> retrieving revision 1.149.2.37
> diff -u -d -r1.149.2.36 -r1.149.2.37
> --- gdk_storage.mx 19 Feb 2010 13:13:38 -0000 1.149.2.36
> +++ gdk_storage.mx 19 Feb 2010 15:40:07 -0000 1.149.2.37
> @@ -707,17 +707,19 @@
> * Peter Feb2010: I tried to do prefetches further apart, to trigger
> multiple readahead
> * units in parallel, but it does improve performance visibly
> */
> -static size_t access_heap(str id, str hp, Heap *h, char* base, size_t sz,
> int touch, int preload, int advise) {
> +static size_t access_heap(str id, str hp, Heap *h, char* base, size_t sz,
> int touch, int preload, int adv) {
> size_t v0 = 0, v1 = 0, v2 = 0, v3 = 0, v4 = 0, v5 =0, v6 = 0, v7 = 0,
> page = MT_pagesize();
> + str advise =
> (adv==MMAP_WILLNEED)?"WILLNEED":(adv==MMAP_SEQUENTIAL)?"SEQUENTIAL":(adv==MMAP_RANDOM)?"RANDOM":(adv==MMAP_NORMAL)?"NORMAL":NULL;
> int t = GDKms();
> + assert(advise);
> if (h->storage != STORE_MEM && h->size > MT_MMAP_TILE) {
> - MT_mmap_inform(h->base, h->size, preload, advise, 0);
> + MT_mmap_inform(h->base, h->size, preload, adv, 0);
> if (preload > 0) {
> - void* alignedbase = (void*) (((size_t) base) &
> ~(page-1));
> - size_t alignedsz = (sz + (page-1)) & ~(page-1);
> - int ret = posix_madvise(alignedbase, sz, advise);
> - if (ret) THRprintf(GDKerr, "#MT_mmap_inform:
> posix_madvise(file=%s, base="PTRFMT", len="SZFMT"MB, advice=%d) = %d\n",
> - h->filename, PTRFMTCAST alignedbase,
> alignedsz >> 20, advise, errno);
> + size_t alignskip = (page - (((size_t) base) &
> (page-1))) & (page-1);
> + size_t alignedsz = (size_t) (((sz <
> alignskip)?0:((size_t) (sz-alignskip))) & ~(page-1));
> + int ret = posix_madvise(base + alignskip, alignedsz,
> adv);
> + if (ret) THRprintf(GDKerr, "#MT_mmap_inform:
> posix_madvise(file=%s, base="PTRFMT", len="SZFMT"MB, advice=%s) = %d\n",
> + h->filename, PTRFMTCAST (base +
> alignskip), alignedsz >> 20, advise, errno);
> }
> }
> if (touch && preload > 0) {
> @@ -731,8 +733,7 @@
> }
> for (hi += 7*page; lo <= hi; lo +=page) v0 += *lo;
> }
> - IODEBUG THRprintf(GDKout,"#BATpreload(%s->%s,preload=%d,sz=%dMB,%s) =
> %dms \n", id, hp, preload, (int) (sz>>20),
> -
> (advise==MMAP_WILLNEED)?"WILLNEED":(advise==MMAP_SEQUENTIAL)?"SEQUENTIAL":"UNKNOWN",
> GDKms()-t);
> + IODEBUG THRprintf(GDKout,"#BATpreload(%s->%s,preload=%d,sz=%dMB,%s) =
> %dms \n", id, hp, preload, (int) (sz>>20), advise, GDKms()-t);
> return v0+v1+v2+v3+v4+v5+v6+v7;
> }
>
> @@ -743,7 +744,6 @@
> str id = BATgetId(b);
> BATiter bi = bat_iterator(b);
>
> -
> assert(advise==MMAP_NORMAL||advise==MMAP_RANDOM||advise==MMAP_SEQUENTIAL||advise==MMAP_WILLNEED||advise==MMAP_DONTNEED);
> if (BATcount(b) == 0) return 0;
>
> /* HASH indices (inherent random access). handle first as they *will*
> be access randomly (one can always hope for locality on the other heaps) */
> @@ -760,30 +760,46 @@
> gdk_unset_lock(GDKhashLock(ABS(b->batCacheid) & BBP_BATMASK),
> "BATaccess");
> }
>
> - /* we only touch stuff that is going to be read randomly (WILLNEED).
> Note varheaps are sequential wrt to the references, or small */
> - if ( what&USE_HEAD) {
> + /* vheaps next, as shared vheaps are not seq-correlated needing
> WILLNEED (use prefetch budget for this first) */
> + if ( what&USE_HEAD ) {
> + if (b->H->vheap && b->H->vheap->base) {
> + char *lo = BUNhead(bi, BUNfirst(b)), *hi = BUNhead(bi,
> BUNlast(b)-1);
> + int heap_advise = advise;
> + if (b->H->vheap->copied) { /* shared string heaps are
> not (likely) to be sequentially correlated */
> + lo = b->H->vheap->base; hi = lo +
> b->H->vheap->free;
> + heap_advise = MADV_WILLNEED;
> + }
> + budget -= sz = ((hi-lo) > budget)?budget:(hi-lo);
> + v += access_heap(id, "hheap", b->H->vheap, lo, sz,
> (advise == BUF_WILLNEED), preload, heap_advise);
> + }
> + }
> + if ( what&USE_TAIL ) {
> + if (b->T->vheap && b->T->vheap->base) {
> + char *lo = BUNtail(bi, BUNfirst(b)), *hi = BUNtail(bi,
> BUNlast(b)-1);
> + int heap_advise = advise;
> + if (b->T->vheap->copied) { /* shared string heaps are
> not (likely) to be sequentially correlated */
> + lo = b->T->vheap->base; hi = lo +
> b->T->vheap->free;
> + heap_advise = MADV_WILLNEED;
> + }
> + budget -= sz = ((hi-lo) > budget)?budget:(hi-lo);
> + v += access_heap(id, "theap", b->T->vheap, lo, sz,
> (advise == BUF_WILLNEED), preload, heap_advise);
> + }
> + }
> +
> + /* BUN heaps are last in line for prefetch budget */
> + if ( what&USE_HEAD ) {
> if (b->H->heap.base) {
> char *lo = BUNhloc(bi, BUNfirst(b)), *hi = BUNhloc(bi,
> BUNlast(b)-1);
> budget -= sz = ((hi-lo) > budget)?budget:(hi-lo);
> v += access_heap(id, "hbuns", &b->H->heap, lo, sz,
> (advise == MMAP_WILLNEED), preload, advise);
> }
> - if (b->H->vheap && b->H->vheap->base) {
> - char *lo = BUNhead(bi, BUNfirst(b)), *hi = BUNhead(bi,
> BUNlast(b)-1);
> - budget -= sz = ((hi-lo) > budget)?budget:(hi-lo);
> - v += access_heap(id, "hheap", b->H->vheap, lo, sz,
> (advise == MMAP_WILLNEED), preload, advise);
> - }
> }
> - if ( what&USE_TAIL) {
> + if ( what&USE_TAIL ) {
> if (b->T->heap.base) {
> char *lo = BUNtloc(bi, BUNfirst(b)), *hi = BUNtloc(bi,
> BUNlast(b)-1);
> budget -= sz = ((hi-lo) > budget)?budget:(hi-lo);
> v += access_heap(id, "tbuns", &b->T->heap, lo, sz,
> (advise == MMAP_WILLNEED), preload, advise);
> }
> - if (b->T->vheap && b->T->vheap->base) {
> - char *lo = BUNtail(bi, BUNfirst(b)), *hi = BUNtail(bi,
> BUNlast(b)-1);
> - budget -= sz = ((hi-lo) > budget)?budget:(hi-lo);
> - v += access_heap(id, "theap", b->T->vheap, lo, sz,
> (advise == MMAP_WILLNEED), preload, advise);
> - }
> }
> return v;
> }
>
> Index: gdk_posix.mx
> ===================================================================
> RCS file: /cvsroot/monetdb/MonetDB/src/gdk/gdk_posix.mx,v
> retrieving revision 1.176.2.24
> retrieving revision 1.176.2.25
> diff -u -d -r1.176.2.24 -r1.176.2.25
> --- gdk_posix.mx 19 Feb 2010 13:13:38 -0000 1.176.2.24
> +++ gdk_posix.mx 19 Feb 2010 15:40:07 -0000 1.176.2.25
> @@ -675,7 +675,7 @@
> #ifdef HAVE_POSIX_FADVISE
> if (!do_not_use_posix_fadvise && MT_mmap_tab[victim].fd
> >= 0) {
> /* tell the OS quite clearly that you want to
> drop this */
> - ret = posix_fadvise(MT_mmap_tab[victim].fd,
> 0LL, MT_mmap_tab[victim].len, POSIX_FADV_DONTNEED);
> + ret = posix_fadvise(MT_mmap_tab[victim].fd,
> 0LL, MT_mmap_tab[victim].len & ~MT_pagesize(), POSIX_FADV_DONTNEED);
> #ifdef MMAP_DEBUG
> stream_printf(GDKerr, "#MT_mmap_del:
> posix_fadvise(%s,fd=%d,%uMB,POSIX_FADV_DONTNEED) = %d\n",
> MT_mmap_tab[victim].path, MT_mmap_tab[victim].fd, (unsigned int)
> (MT_mmap_tab[victim].len >> 20), ret);
> #endif
> @@ -709,7 +709,7 @@
> i = MT_mmap_idx(base, len);
> if (i >= 0) {
> if (MT_mmap_tab[i].fd >= 0) {
> - ret = posix_fadvise(MT_mmap_tab[i].fd, 0, len,
> advice);
> + ret = posix_fadvise(MT_mmap_tab[i].fd, 0, len &
> ~MT_pagesize(), advice);
> #ifdef MMAP_DEBUG
> stream_printf(GDKerr, "#MT_fadvise:
> posix_fadvise(%s,fd=%d,%uMB,%d) = %d\n", MT_mmap_tab[i].path,
> MT_mmap_tab[i].fd, (unsigned int) (len >> 20), advice, ret);
> #endif
> @@ -733,7 +733,7 @@
> {
> size_t len = MIN((size_t) MT_MMAP_TILE, MT_mmap_tab[i].len - off);
> /* tell Linux to please stop caching this stuff */
> - int ret = posix_madvise(MT_mmap_tab[i].base + off, len,
> POSIX_MADV_DONTNEED);
> + int ret = posix_madvise(MT_mmap_tab[i].base + off, len &
> ~MT_pagesize(), POSIX_MADV_DONTNEED);
>
> if (err) {
> stream_printf(err, "#MT_mmap_unload_tile:
> posix_madvise(%s,off=%uMB,%uMB,fd=%d,POSIX_MADV_DONTNEED) = %d\n",
> @@ -743,7 +743,7 @@
> #ifdef HAVE_POSIX_FADVISE
> if (!do_not_use_posix_fadvise) {
> /* tell the OS quite clearly that you want to drop this */
> - ret = posix_fadvise(MT_mmap_tab[i].fd, off, len,
> POSIX_FADV_DONTNEED);
> + ret = posix_fadvise(MT_mmap_tab[i].fd, off, len &
> ~MT_pagesize(), POSIX_FADV_DONTNEED);
> if (err) {
> stream_printf(err, "#MT_mmap_unload_tile:
> posix_fadvise(%s,off=%uMB,%uMB,fd=%d,POSIX_MADV_DONTNEED) = %d\n",
> MT_mmap_tab[i].path, (unsigned int) (off
> >> 20),
> @@ -908,10 +908,9 @@
> MT_mmap_tab[i].random += preload * (advise == MMAP_WILLNEED);
> /* done as a counter to keep track of multiple threads */
> MT_mmap_tab[i].usecnt += preload; /* active thread count */
> unload = MT_mmap_tab[i].usecnt == 0;
> + if (unload) ret = posix_madvise(MT_mmap_tab[i].base,
> MT_mmap_tab[i].len & ~MT_pagesize(), MMAP_SEQUENTIAL);
> }
> (void) pthread_mutex_unlock(&MT_mmap_lock);
> - if (unload)
> - ret = posix_madvise(base, len, MMAP_SEQUENTIAL);
> if (ret) {
> stream_printf(GDKerr, "#MT_mmap_inform: posix_madvise(file=%s,
> fd=%d, base="PTRFMT", len="SZFMT"MB, advice=%d) = %d\n",
> (i >= 0 ? MT_mmap_tab[i].path : ""), (i >= 0 ?
> MT_mmap_tab[i].fd : -1),
> @@ -1156,7 +1155,7 @@
>
> if (ret != (void *) -1L) {
> if (hdl->mode & MMAP_ADVISE) {
> - (void) MT_madvise(ret, len, hdl->mode & MMAP_ADVISE);
> + (void) MT_madvise(ret, len & ~MT_pagesize(), hdl->mode
> & MMAP_ADVISE);
> }
> hdl->fixed = (void *) ((char *) ret + len);
> }
> @@ -1199,7 +1198,7 @@
> int
> MT_madvise(void *p, size_t len, int advise)
> {
> - int ret = posix_madvise(p, len, advise);
> + int ret = posix_madvise(p, len & ~MT_pagesize(), advise);
>
> #ifdef MMAP_DEBUG
> stream_printf(GDKerr, "#posix_madvise(" PTRFMT "," SZFMT ",%d) = %d\n",
> PTRFMTCAST p, len, advise, ret);
>
>
> ------------------------------------------------------------------------------
> Download Intel® Parallel Studio Eval
> Try the new software tools for yourself. Speed compiling, find bugs
> proactively, and fine-tune applications for parallel performance.
> See why Intel Parallel Studio got high marks during beta.
> http://p.sf.net/sfu/intel-sw-dev
> _______________________________________________
> Monetdb-checkins mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/monetdb-checkins
>
--
| Dr. Stefan Manegold | mailto:[email protected] |
| CWI, P.O.Box 94079 | http://www.cwi.nl/~manegold/ |
| 1090 GB Amsterdam | Tel.: +31 (20) 592-4212 |
| The Netherlands | Fax : +31 (20) 592-4199 |
------------------------------------------------------------------------------
Download Intel® Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
_______________________________________________
Monetdb-developers mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-developers