On Fri, Feb 19, 2010 at 03:40:10PM +0000, Peter Boncz wrote:
> Update of /cvsroot/monetdb/MonetDB/src/gdk
> In directory sfp-cvsdas-1.v30.ch3.sourceforge.com:/tmp/cvs-serv10221
> 
> Modified Files:
>       Tag: Feb2010
>       gdk_posix.mx gdk_storage.mx 
> Log Message:
> - fix newly introduced bug (BUF_SEQUENTIAL passed to posix_madvise iso 
> MMAP_SEQUENTIAL)
>   most of this was done by Stefan, but the intent of the code was when all 
> users
>   are gone, to give uniform (sequential) advise to the whole heap
> - make sure the length of advise is page aligned (not strictly needed for 
>   Linux, but maybe of other Un*ces)
> - in case of shared vheaps (as done by leftfetchjoin into a string bat), 
> assume that
>   access to the string heap will be random.

shouldn't
        len & ~MT_pagesize()
then rather be
        len & ~(MT_pagesize()-1)

(assuming pages size is usually a power of 2)

?

Stefan


> 
> 
> Index: gdk_storage.mx
> ===================================================================
> RCS file: /cvsroot/monetdb/MonetDB/src/gdk/gdk_storage.mx,v
> retrieving revision 1.149.2.36
> retrieving revision 1.149.2.37
> diff -u -d -r1.149.2.36 -r1.149.2.37
> --- gdk_storage.mx    19 Feb 2010 13:13:38 -0000      1.149.2.36
> +++ gdk_storage.mx    19 Feb 2010 15:40:07 -0000      1.149.2.37
> @@ -707,17 +707,19 @@
>   * Peter Feb2010: I tried to do prefetches further apart, to trigger 
> multiple readahead
>   *                units in parallel, but it does improve performance visibly 
>   */
> -static size_t access_heap(str id, str hp, Heap *h, char* base, size_t sz, 
> int touch, int preload, int advise) {
> +static size_t access_heap(str id, str hp, Heap *h, char* base, size_t sz, 
> int touch, int preload, int adv) {
>       size_t v0 = 0, v1 = 0, v2 = 0, v3 = 0, v4 = 0, v5 =0, v6 = 0, v7 = 0, 
> page = MT_pagesize();
> +     str advise = 
> (adv==MMAP_WILLNEED)?"WILLNEED":(adv==MMAP_SEQUENTIAL)?"SEQUENTIAL":(adv==MMAP_RANDOM)?"RANDOM":(adv==MMAP_NORMAL)?"NORMAL":NULL;
>       int t = GDKms();
> +     assert(advise);
>       if (h->storage != STORE_MEM && h->size > MT_MMAP_TILE) {
> -             MT_mmap_inform(h->base, h->size, preload, advise, 0);
> +             MT_mmap_inform(h->base, h->size, preload, adv, 0);
>               if (preload > 0) {
> -                     void* alignedbase = (void*) (((size_t) base) & 
> ~(page-1));
> -                     size_t alignedsz = (sz + (page-1)) & ~(page-1);
> -                     int ret = posix_madvise(alignedbase, sz, advise);
> -                     if (ret) THRprintf(GDKerr, "#MT_mmap_inform: 
> posix_madvise(file=%s, base="PTRFMT", len="SZFMT"MB, advice=%d) = %d\n", 
> -                                     h->filename, PTRFMTCAST alignedbase, 
> alignedsz >> 20, advise, errno);
> +                     size_t alignskip = (page - (((size_t) base) & 
> (page-1))) & (page-1);
> +                     size_t alignedsz = (size_t) (((sz < 
> alignskip)?0:((size_t) (sz-alignskip))) & ~(page-1));
> +                     int ret = posix_madvise(base + alignskip, alignedsz, 
> adv);
> +                     if (ret) THRprintf(GDKerr, "#MT_mmap_inform: 
> posix_madvise(file=%s, base="PTRFMT", len="SZFMT"MB, advice=%s) = %d\n", 
> +                                     h->filename, PTRFMTCAST (base + 
> alignskip), alignedsz >> 20, advise, errno);
>               }
>       }
>       if (touch && preload > 0) {
> @@ -731,8 +733,7 @@
>               }
>               for (hi += 7*page; lo <= hi; lo +=page) v0 += *lo;
>       }
> -     IODEBUG THRprintf(GDKout,"#BATpreload(%s->%s,preload=%d,sz=%dMB,%s) = 
> %dms \n", id, hp, preload, (int) (sz>>20), 
> -             
> (advise==MMAP_WILLNEED)?"WILLNEED":(advise==MMAP_SEQUENTIAL)?"SEQUENTIAL":"UNKNOWN",
>  GDKms()-t);
> +     IODEBUG THRprintf(GDKout,"#BATpreload(%s->%s,preload=%d,sz=%dMB,%s) = 
> %dms \n", id, hp, preload, (int) (sz>>20), advise, GDKms()-t);
>       return v0+v1+v2+v3+v4+v5+v6+v7;
>  }
>  
> @@ -743,7 +744,6 @@
>       str id = BATgetId(b);
>       BATiter bi = bat_iterator(b);
>  
> -     
> assert(advise==MMAP_NORMAL||advise==MMAP_RANDOM||advise==MMAP_SEQUENTIAL||advise==MMAP_WILLNEED||advise==MMAP_DONTNEED);
>       if (BATcount(b) == 0) return 0;
>  
>       /* HASH indices (inherent random access). handle first as they *will* 
> be access randomly (one can always hope for locality on the other heaps) */
> @@ -760,30 +760,46 @@
>               gdk_unset_lock(GDKhashLock(ABS(b->batCacheid) & BBP_BATMASK), 
> "BATaccess");
>       }
>  
> -     /* we only touch stuff that is going to be read randomly (WILLNEED). 
> Note varheaps are sequential wrt to the references, or small */
> -     if ( what&USE_HEAD) {
> +     /* vheaps next, as shared vheaps are not seq-correlated needing 
> WILLNEED (use prefetch budget for this first) */
> +     if ( what&USE_HEAD ) {
> +             if (b->H->vheap && b->H->vheap->base) {
> +                     char *lo = BUNhead(bi, BUNfirst(b)), *hi = BUNhead(bi, 
> BUNlast(b)-1);
> +                     int heap_advise = advise;
> +                     if (b->H->vheap->copied) { /* shared string heaps are 
> not (likely) to be sequentially correlated */
> +                             lo = b->H->vheap->base; hi = lo + 
> b->H->vheap->free;
> +                             heap_advise = MADV_WILLNEED;
> +                     }
> +                     budget -= sz = ((hi-lo) > budget)?budget:(hi-lo);
> +                     v += access_heap(id, "hheap", b->H->vheap, lo, sz, 
> (advise == BUF_WILLNEED), preload, heap_advise);
> +             }
> +     }
> +     if ( what&USE_TAIL ) {
> +             if (b->T->vheap && b->T->vheap->base) {
> +                     char *lo = BUNtail(bi, BUNfirst(b)), *hi = BUNtail(bi, 
> BUNlast(b)-1);
> +                     int heap_advise = advise;
> +                     if (b->T->vheap->copied) { /* shared string heaps are 
> not (likely) to be sequentially correlated */
> +                             lo = b->T->vheap->base; hi = lo + 
> b->T->vheap->free;
> +                             heap_advise = MADV_WILLNEED;
> +                     }
> +                     budget -= sz = ((hi-lo) > budget)?budget:(hi-lo);
> +                     v += access_heap(id, "theap", b->T->vheap, lo, sz, 
> (advise == BUF_WILLNEED), preload, heap_advise);
> +             }
> +     }
> +
> +     /* BUN heaps are last in line for prefetch budget */
> +     if ( what&USE_HEAD ) {
>               if (b->H->heap.base) {
>                       char *lo = BUNhloc(bi, BUNfirst(b)), *hi = BUNhloc(bi, 
> BUNlast(b)-1);
>                       budget -= sz = ((hi-lo) > budget)?budget:(hi-lo);
>                       v += access_heap(id, "hbuns", &b->H->heap, lo, sz, 
> (advise == MMAP_WILLNEED), preload, advise);
>               }
> -             if (b->H->vheap && b->H->vheap->base) {
> -                     char *lo = BUNhead(bi, BUNfirst(b)), *hi = BUNhead(bi, 
> BUNlast(b)-1);
> -                     budget -= sz = ((hi-lo) > budget)?budget:(hi-lo);
> -                     v += access_heap(id, "hheap", b->H->vheap, lo, sz, 
> (advise == MMAP_WILLNEED), preload, advise);
> -             }
>       }
> -     if ( what&USE_TAIL) {
> +     if ( what&USE_TAIL ) {
>               if (b->T->heap.base) {
>                       char *lo = BUNtloc(bi, BUNfirst(b)), *hi = BUNtloc(bi, 
> BUNlast(b)-1);
>                       budget -= sz = ((hi-lo) > budget)?budget:(hi-lo);
>                       v += access_heap(id, "tbuns", &b->T->heap, lo, sz, 
> (advise == MMAP_WILLNEED), preload, advise);
>               }
> -             if (b->T->vheap && b->T->vheap->base) {
> -                     char *lo = BUNtail(bi, BUNfirst(b)), *hi = BUNtail(bi, 
> BUNlast(b)-1);
> -                     budget -= sz = ((hi-lo) > budget)?budget:(hi-lo);
> -                     v += access_heap(id, "theap", b->T->vheap, lo, sz, 
> (advise == MMAP_WILLNEED), preload, advise);
> -             }
>       }
>       return v;
>  }
> 
> Index: gdk_posix.mx
> ===================================================================
> RCS file: /cvsroot/monetdb/MonetDB/src/gdk/gdk_posix.mx,v
> retrieving revision 1.176.2.24
> retrieving revision 1.176.2.25
> diff -u -d -r1.176.2.24 -r1.176.2.25
> --- gdk_posix.mx      19 Feb 2010 13:13:38 -0000      1.176.2.24
> +++ gdk_posix.mx      19 Feb 2010 15:40:07 -0000      1.176.2.25
> @@ -675,7 +675,7 @@
>  #ifdef HAVE_POSIX_FADVISE
>                       if (!do_not_use_posix_fadvise && MT_mmap_tab[victim].fd 
> >= 0) {
>                               /* tell the OS quite clearly that you want to 
> drop this */
> -                             ret = posix_fadvise(MT_mmap_tab[victim].fd, 
> 0LL, MT_mmap_tab[victim].len, POSIX_FADV_DONTNEED);
> +                             ret = posix_fadvise(MT_mmap_tab[victim].fd, 
> 0LL, MT_mmap_tab[victim].len & ~MT_pagesize(), POSIX_FADV_DONTNEED);
>  #ifdef MMAP_DEBUG
>                               stream_printf(GDKerr, "#MT_mmap_del: 
> posix_fadvise(%s,fd=%d,%uMB,POSIX_FADV_DONTNEED) = %d\n", 
> MT_mmap_tab[victim].path, MT_mmap_tab[victim].fd, (unsigned int) 
> (MT_mmap_tab[victim].len >> 20), ret);
>  #endif
> @@ -709,7 +709,7 @@
>               i = MT_mmap_idx(base, len);
>               if (i >= 0) {
>                       if (MT_mmap_tab[i].fd >= 0) {
> -                             ret = posix_fadvise(MT_mmap_tab[i].fd, 0, len, 
> advice);
> +                             ret = posix_fadvise(MT_mmap_tab[i].fd, 0, len & 
> ~MT_pagesize(), advice);
>  #ifdef MMAP_DEBUG
>                               stream_printf(GDKerr, "#MT_fadvise: 
> posix_fadvise(%s,fd=%d,%uMB,%d) = %d\n", MT_mmap_tab[i].path, 
> MT_mmap_tab[i].fd, (unsigned int) (len >> 20), advice, ret);
>  #endif
> @@ -733,7 +733,7 @@
>  {
>       size_t len = MIN((size_t) MT_MMAP_TILE, MT_mmap_tab[i].len - off);
>       /* tell Linux to please stop caching this stuff */
> -     int ret = posix_madvise(MT_mmap_tab[i].base + off, len, 
> POSIX_MADV_DONTNEED);
> +     int ret = posix_madvise(MT_mmap_tab[i].base + off, len & 
> ~MT_pagesize(), POSIX_MADV_DONTNEED);
>  
>       if (err) {
>               stream_printf(err, "#MT_mmap_unload_tile: 
> posix_madvise(%s,off=%uMB,%uMB,fd=%d,POSIX_MADV_DONTNEED) = %d\n",
> @@ -743,7 +743,7 @@
>  #ifdef HAVE_POSIX_FADVISE
>       if (!do_not_use_posix_fadvise) {
>               /* tell the OS quite clearly that you want to drop this */
> -             ret = posix_fadvise(MT_mmap_tab[i].fd, off, len, 
> POSIX_FADV_DONTNEED);
> +             ret = posix_fadvise(MT_mmap_tab[i].fd, off, len & 
> ~MT_pagesize(), POSIX_FADV_DONTNEED);
>               if (err) {
>                       stream_printf(err, "#MT_mmap_unload_tile: 
> posix_fadvise(%s,off=%uMB,%uMB,fd=%d,POSIX_MADV_DONTNEED) = %d\n",
>                                     MT_mmap_tab[i].path, (unsigned int) (off 
> >> 20),
> @@ -908,10 +908,9 @@
>               MT_mmap_tab[i].random += preload * (advise == MMAP_WILLNEED); 
> /* done as a counter to keep track of multiple threads */
>               MT_mmap_tab[i].usecnt += preload; /* active thread count */
>               unload = MT_mmap_tab[i].usecnt == 0;
> +             if (unload) ret = posix_madvise(MT_mmap_tab[i].base, 
> MT_mmap_tab[i].len & ~MT_pagesize(), MMAP_SEQUENTIAL);
>       }
>       (void) pthread_mutex_unlock(&MT_mmap_lock);
> -     if (unload)
> -             ret = posix_madvise(base, len, MMAP_SEQUENTIAL);
>       if (ret) {
>               stream_printf(GDKerr, "#MT_mmap_inform: posix_madvise(file=%s, 
> fd=%d, base="PTRFMT", len="SZFMT"MB, advice=%d) = %d\n",
>                             (i >= 0 ? MT_mmap_tab[i].path : ""), (i >= 0 ? 
> MT_mmap_tab[i].fd : -1),
> @@ -1156,7 +1155,7 @@
>  
>       if (ret != (void *) -1L) {
>               if (hdl->mode & MMAP_ADVISE) {
> -                     (void) MT_madvise(ret, len, hdl->mode & MMAP_ADVISE);
> +                     (void) MT_madvise(ret, len & ~MT_pagesize(), hdl->mode 
> & MMAP_ADVISE);
>               }
>               hdl->fixed = (void *) ((char *) ret + len);
>       }
> @@ -1199,7 +1198,7 @@
>  int
>  MT_madvise(void *p, size_t len, int advise)
>  {
> -     int ret = posix_madvise(p, len, advise);
> +     int ret = posix_madvise(p, len & ~MT_pagesize(), advise);
>  
>  #ifdef MMAP_DEBUG
>       stream_printf(GDKerr, "#posix_madvise(" PTRFMT "," SZFMT ",%d) = %d\n", 
> PTRFMTCAST p, len, advise, ret);
> 
> 
> ------------------------------------------------------------------------------
> Download Intel&#174; Parallel Studio Eval
> Try the new software tools for yourself. Speed compiling, find bugs
> proactively, and fine-tune applications for parallel performance.
> See why Intel Parallel Studio got high marks during beta.
> http://p.sf.net/sfu/intel-sw-dev
> _______________________________________________
> Monetdb-checkins mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/monetdb-checkins
> 

-- 
| Dr. Stefan Manegold | mailto:[email protected] |
| CWI,  P.O.Box 94079 | http://www.cwi.nl/~manegold/  |
| 1090 GB Amsterdam   | Tel.: +31 (20) 592-4212       |
| The Netherlands     | Fax : +31 (20) 592-4199       |

------------------------------------------------------------------------------
Download Intel&#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
_______________________________________________
Monetdb-developers mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/monetdb-developers

Reply via email to