> [...] change SVal to 64-bit at some point (I vote for it!), 

I already did that in Nov 07, but did not commit due to increase
in run time and space usage.  The resulting patch is attached.
It allows up to 24 bits for lock- and thread-set IDs.  I can't
remember now why I limited it to 24 bits -- could go up to about
30 bits.

Really 64-bit for a SVal is too much but 32-bit is not enough.
Something like 48 bits would be a good compromise, but there is
no sensible way to do that in C.  Oh well.

The patch also changes CacheLine to have a dirty bit in an attempt
to reduce the performance overhead from writing back cache lines.
That helps, although it also adds to the complexity and overhead
of verifying that the shadow memory system is functioning
correctly.

J
Index: helgrind/hg_main.c
===================================================================
--- helgrind/hg_main.c	(revision 7350)
+++ helgrind/hg_main.c	(working copy)
@@ -90,7 +90,7 @@
 // shadow_mem_make_NoAccess: 29156 SMs, 1728 scanned
 // happens_before_wrk: 1000
 // ev__post_thread_join: 3360 SMs, 29 scanned, 252 re-Excls
-#define SHOW_EXPENSIVE_STUFF 0
+#define SHOW_EXPENSIVE_STUFF 1
 
 // 0 for silent, 1 for some stuff, 2 for lots of stuff
 #define SHOW_EVENTS 0
@@ -113,7 +113,6 @@
 #if 0
 #  define SCE_CACHELINE 1  /* do sanity-check CacheLine stuff */
 #  define inline __attribute__((noinline))
-   /* probably want to ditch -fomit-frame-pointer too */
 #else
 #  define SCE_CACHELINE 0   /* don't sanity-check CacheLine stuff */
 #endif
@@ -239,7 +238,7 @@
 /*----------------------------------------------------------------*/
 
 /* Shadow values. */
-typedef  UInt  SVal;
+typedef  ULong  SVal;
 
 
 /* These are handles for thread segments.  CONSTRAINTS: Must be small
@@ -357,14 +356,15 @@
 
 /* ------ CacheLine ------ */
 
-#define N_LINE_BITS      5 /* must be >= 3 */
+#define N_LINE_BITS      6 /* must be >= 3 */
 #define N_LINE_ARANGE    (1 << N_LINE_BITS)
 #define N_LINE_TREES     (N_LINE_ARANGE >> 3)
 
 typedef
    struct {
+      SVal   svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8
       UShort descrs[N_LINE_TREES];
-      SVal   svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8
+      Bool   dirty;
    }
    CacheLine;
 
@@ -422,7 +422,7 @@
    Each SecMap must hold a power-of-2 number of CacheLines.  Hence
    N_SECMAP_BITS must >= N_LINE_BITS.
 */
-#define N_SECMAP_BITS   13
+#define N_SECMAP_BITS   12
 #define N_SECMAP_ARANGE (1 << N_SECMAP_BITS)
 
 // # CacheLines held by a SecMap
@@ -484,7 +484,7 @@
       if (itr->word_no == N_LINE_ARANGE)
          itr->word_no = 0;
    } else {
-      tl_assert(itr->word_no >= 0 && itr->word_no <= 3);
+      tl_assert(itr->word_no >= 0 && itr->word_no < 4);
       tl_assert(lineZ->dict[itr->word_no] != 0);
       *pVal = &lineZ->dict[itr->word_no];
       itr->word_no++;
@@ -884,26 +884,29 @@
 
 /* Shadow value encodings:
 
-   11 WordSetID:TSID_BITS WordSetID:LSID_BITS  ShM  thread-set lock-set
-   10 WordSetID:TSID_BITS WordSetID:LSID_BITS  ShR  thread-set lock-set
-   01 TSegmentID:30                            Excl thread-segment
-   00 0--(20)--0 10 0000 0000                  New
-   00 0--(20)--0 01 0000 0000                  NoAccess
-   00 0--(20)--0 00 0000 0000                  Invalid
+   11 00 0000 WordSetID:24  0000 0000 WordSetID:24   ShM thread-set lock-set
+   10 00 0000 WordSetID:24  0000 0000 WordSetID:24   ShR thread-set lock-set
+   01 0--(30)--0            0000 0000 TSegmentID:24  Excl thread-segment
+   00 0--(30)--0            0--(22)--0 10 0000 0000  New
+   00 0--(30)--0            0--(22)--0 01 0000 0000  NoAccess
+   00 0--(30)--0            0--(22)--0 00 0000 0000  Invalid
 
-   TSID_BITS + LSID_BITS must equal 30.
    The elements in thread sets are Thread*, casted to Word.
    The elements in lock sets are Lock*, casted to Word.
 */
 
-#define N_LSID_BITS  17
-#define N_LSID_MASK  ((1 << (N_LSID_BITS)) - 1)
-#define N_LSID_SHIFT 0
+#define N_LSID_BITS   24 /* do not change this */
+#define N_LSID_MASK   ((1UL << (N_LSID_BITS)) - 1)
+#define N_LSID_SHIFT  0
 
-#define N_TSID_BITS  (30 - (N_LSID_BITS))
-#define N_TSID_MASK  ((1 << (N_TSID_BITS)) - 1)
-#define N_TSID_SHIFT (N_LSID_BITS)
+#define N_TSID_BITS   24 /* do not change this */
+#define N_TSID_MASK   ((1 << (N_TSID_BITS)) - 1)
+#define N_TSID_SHIFT  32
 
+#define N_SEGID_BITS  24 /* do not change this */
+#define N_SEGID_MASK  ((1UL << (N_SEGID_BITS)) - 1)
+#define N_SEGID_SHIFT 0
+
 static inline Bool is_sane_WordSetID_LSet ( WordSetID wset ) {
    return wset >= 0 && wset <= N_LSID_MASK;
 }
@@ -911,7 +914,6 @@
    return wset >= 0 && wset <= N_TSID_MASK;
 }
 
-
 __attribute__((noinline))
 __attribute__((noreturn))
 static void mk_SHVAL_fail ( WordSetID tset, WordSetID lset, HChar* who ) {
@@ -929,8 +931,9 @@
 static inline SVal mk_SHVAL_ShM ( WordSetID tset, WordSetID lset ) {
    if (LIKELY(is_sane_WordSetID_TSet(tset) 
               && is_sane_WordSetID_LSet(lset))) {
-      return (SVal)( (3<<30) | (tset << N_TSID_SHIFT) 
-                             | (lset << N_LSID_SHIFT));
+      return (((SVal)3) << 62)
+             | (((SVal)tset) << N_TSID_SHIFT)
+             | (((SVal)lset) << N_LSID_SHIFT);
    } else {
       mk_SHVAL_fail(tset, lset, "mk_SHVAL_ShM");
    }
@@ -938,31 +941,32 @@
 static inline SVal mk_SHVAL_ShR ( WordSetID tset, WordSetID lset ) {
    if (LIKELY(is_sane_WordSetID_TSet(tset) 
               && is_sane_WordSetID_LSet(lset))) {
-      return (SVal)( (2<<30) | (tset << N_TSID_SHIFT) 
-                             | (lset << N_LSID_SHIFT) );
+      return (((SVal)2) << 62)
+             | (((SVal)tset) << N_TSID_SHIFT)
+             | (((SVal)lset) << N_LSID_SHIFT);
    } else {
       mk_SHVAL_fail(tset, lset, "mk_SHVAL_ShR");
    }
 }
 static inline SVal mk_SHVAL_Excl ( SegmentID tseg ) {
    tl_assert(is_sane_SegmentID(tseg));
-   return (SVal)( (1<<30) | tseg );
+   return (((SVal)1) << 62) | (((SVal)tseg) << N_SEGID_SHIFT);
 }
-#define SHVAL_New      ((SVal)(2<<8))
-#define SHVAL_NoAccess ((SVal)(1<<8))
-#define SHVAL_Invalid  ((SVal)(0<<8))
+#define SHVAL_New      (((SVal)2)<<8)
+#define SHVAL_NoAccess (((SVal)1)<<8)
+#define SHVAL_Invalid  (((SVal)0)<<8)
 
 static inline Bool is_SHVAL_ShM ( SVal w32 ) { 
-   return (w32 >> 30) == 3;
+   return (w32 >> 62) == 3;
 }
 static inline Bool is_SHVAL_ShR ( SVal w32 ) {
-   return (w32 >> 30) == 2;
+   return (w32 >> 62) == 2;
 }
 static inline Bool is_SHVAL_Sh ( SVal w32 ) {
-   return (w32 >> 31) == 1;
+   return (w32 >> 63) == 1;
 }
 static inline Bool is_SHVAL_Excl ( SVal w32 ) {
-   return (w32 >> 30) == 1; 
+   return (w32 >> 62) == 1; 
 }
 static inline Bool is_SHVAL_New ( SVal w32 ) {
    return w32 == SHVAL_New;
@@ -977,7 +981,7 @@
 
 static inline SegmentID un_SHVAL_Excl ( SVal w32 ) {
    tl_assert(is_SHVAL_Excl(w32));
-   return w32 & ~(3<<30);
+   return w32 & ~(((SVal)3)<<62);
 }
 static inline WordSetID un_SHVAL_ShR_tset ( SVal w32 ) {
    tl_assert(is_SHVAL_ShR(w32));
@@ -3492,6 +3496,7 @@
       VG_(printf)("pp_CacheLine(NULL)\n");
       return;
    }
+   VG_(printf)("   dirty: %d\n", (Int)cl->dirty);
    for (i = 0; i < N_LINE_TREES; i++) 
       VG_(printf)("   descr: %04lx\n", (UWord)cl->descrs[i]);
    for (i = 0; i < N_LINE_ARANGE; i++) 
@@ -3685,6 +3690,7 @@
    Word tno, cloff;
 
    if (!cl) goto bad;
+   if (cl->dirty != False && cl->dirty != True) goto bad;
 
    for (tno = 0, cloff = 0;  tno < N_LINE_TREES;  tno++, cloff += 8) {
       UShort descr = cl->descrs[tno];
@@ -3866,6 +3872,10 @@
    if (!is_valid_scache_tag(tag))
       return;
 
+   /* Or there may be no point in writing it. */
+   if (!cl->dirty)
+      return;
+
    /* Where are we going to put it? */
    sm         = NULL;
    lineZ      = NULL;
@@ -3931,6 +3941,7 @@
    if (anyShared)
       sm->mbHasShared = True;
 
+   cl->dirty = False;
    /* mb_tidy_one_cacheline(); */
 }
 
@@ -3974,7 +3985,7 @@
       for (i = 0; i < N_LINE_ARANGE; i++) {
          SVal sv;
          UWord ix = read_twobit_array( lineZ->ix2s, i );
-         tl_assert(ix >= 0 && ix <= 3);
+         tl_assert(ix >= 0 && ix < 4);
          sv = lineZ->dict[ix];
          tl_assert(sv != 0);
          cl->svals[i] = sv;
@@ -3982,6 +3993,7 @@
       stats__cache_Z_fetches++;
    }
    normalise_CacheLine( cl );
+   cl->dirty = False;
 }
 
 static void shmem__invalidate_scache ( void ) {
@@ -3990,6 +4002,7 @@
    tl_assert(!is_valid_scache_tag(1));
    for (wix = 0; wix < N_WAY_NENT; wix++) {
       cache_shmem.tags0[wix] = 1/*INVALID*/;
+      cache_shmem.lyns0[wix].dirty = False;
    }
    stats__cache_invals++;
 }
@@ -4277,7 +4290,10 @@
    }
    svOld = cl->svals[cloff];
    svNew = msm__handle_read( thr_acc, a, svOld, 1 );
-   cl->svals[cloff] = svNew;
+   if (svNew != svOld) {
+      cl->svals[cloff] = svNew;
+      cl->dirty        = True;
+   }
 }
 static void shadow_mem_read16 ( Thread* thr_acc, Addr a, SVal uuOpaque ) {
    CacheLine* cl; 
@@ -4303,7 +4319,10 @@
    }
    svOld = cl->svals[cloff];
    svNew = msm__handle_read( thr_acc, a, svOld, 2 );
-   cl->svals[cloff] = svNew;
+   if (svNew != svOld) {
+      cl->svals[cloff] = svNew;
+      cl->dirty        = True;
+   }
    return;
   slowcase: /* misaligned, or must go further down the tree */
    stats__cline_16to8splits++;
@@ -4335,7 +4354,10 @@
    }
    svOld = cl->svals[cloff];
    svNew = msm__handle_read( thr_acc, a, svOld, 4 );
-   cl->svals[cloff] = svNew;
+   if (svNew != svOld) {
+      cl->svals[cloff] = svNew;
+      cl->dirty        = True;
+   }
    return;
   slowcase: /* misaligned, or must go further down the tree */
    stats__cline_32to16splits++;
@@ -4346,6 +4368,7 @@
 static void shadow_mem_read32 ( Thread* thr_acc, Addr a, SVal uuOpaque ) {
    CacheLine* cl; 
    UWord      cloff, tno, toff;
+   SVal       svOld, svNew;
    UShort     descr;
    stats__cline_read32s++;
    if (UNLIKELY(!aligned32(a))) goto slowcase;
@@ -4355,8 +4378,11 @@
    toff  = get_tree_offset(a); /* == 0 or 4 */
    descr = cl->descrs[tno];
    if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) goto slowcase;
-   { SVal* p = &cl->svals[cloff];
-     *p = msm__handle_read( thr_acc, a, *p, 4 );
+   svOld = cl->svals[cloff];
+   svNew = msm__handle_read( thr_acc, a, svOld, 4 );
+   if (svNew != svOld) {
+      cl->svals[cloff] = svNew;
+      cl->dirty        = True;
    }
    return;
   slowcase: /* misaligned, or not at this level in the tree */
@@ -4381,7 +4407,10 @@
    }
    svOld = cl->svals[cloff];
    svNew = msm__handle_read( thr_acc, a, svOld, 8 );
-   cl->svals[cloff] = svNew;
+   if (svNew != svOld) {
+      cl->svals[cloff] = svNew;
+      cl->dirty        = True;
+   }
    return;
   slowcase: /* misaligned, or must go further down the tree */
    stats__cline_64to32splits++;
@@ -4408,7 +4437,10 @@
    }
    svOld = cl->svals[cloff];
    svNew = msm__handle_write( thr_acc, a, svOld, 1 );
-   cl->svals[cloff] = svNew;
+   if (svNew != svOld) {
+      cl->svals[cloff] = svNew;
+      cl->dirty        = True;
+   }
 }
 static void shadow_mem_write16 ( Thread* thr_acc, Addr a, SVal uuOpaque ) {
    CacheLine* cl; 
@@ -4434,7 +4466,10 @@
    }
    svOld = cl->svals[cloff];
    svNew = msm__handle_write( thr_acc, a, svOld, 2 );
-   cl->svals[cloff] = svNew;
+   if (svNew != svOld) {
+      cl->svals[cloff] = svNew;
+      cl->dirty        = True;
+   }
    return;
   slowcase: /* misaligned, or must go further down the tree */
    stats__cline_16to8splits++;
@@ -4466,7 +4501,10 @@
    }
    svOld = cl->svals[cloff];
    svNew = msm__handle_write( thr_acc, a, svOld, 4 );
-   cl->svals[cloff] = svNew;
+   if (svNew != svOld) {
+      cl->svals[cloff] = svNew;
+      cl->dirty        = True;
+   }
    return;
   slowcase: /* misaligned, or must go further down the tree */
    stats__cline_32to16splits++;
@@ -4477,6 +4515,7 @@
 static void shadow_mem_write32 ( Thread* thr_acc, Addr a, SVal uuOpaque ) {
    CacheLine* cl; 
    UWord      cloff, tno, toff;
+   SVal       svOld, svNew;
    UShort     descr;
    stats__cline_write32s++;
    if (UNLIKELY(!aligned32(a))) goto slowcase;
@@ -4486,8 +4525,11 @@
    toff  = get_tree_offset(a); /* == 0 or 4 */
    descr = cl->descrs[tno];
    if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) goto slowcase;
-   { SVal* p = &cl->svals[cloff];
-     *p = msm__handle_write( thr_acc, a, *p, 4 );
+   svOld = cl->svals[cloff];
+   svNew = msm__handle_write( thr_acc, a, svOld, 4 );
+   if (svNew != svOld) {
+      cl->svals[cloff] = svNew;
+      cl->dirty        = True;
    }
    return;
   slowcase: /* misaligned, or must go further down the tree */
@@ -4512,7 +4554,10 @@
    }
    svOld = cl->svals[cloff];
    svNew = msm__handle_write( thr_acc, a, svOld, 8 );
-   cl->svals[cloff] = svNew;
+   if (svNew != svOld) {
+      cl->svals[cloff] = svNew;
+      cl->dirty        = True;
+   }
    return;
   slowcase: /* misaligned, or must go further down the tree */
    stats__cline_64to32splits++;
@@ -4537,6 +4582,7 @@
          tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
    }
    cl->svals[cloff] = svNew;
+   cl->dirty = True;
 }
 static void shadow_mem_set16 ( Thread* uu_thr_acc, Addr a, SVal svNew ) {
    CacheLine* cl; 
@@ -4567,6 +4613,7 @@
    }
    cl->svals[cloff + 0] = svNew;
    cl->svals[cloff + 1] = 0;
+   cl->dirty = True;
    return;
   slowcase: /* misaligned */
    stats__cline_16to8splits++;
@@ -4604,6 +4651,7 @@
    cl->svals[cloff + 1] = 0;
    cl->svals[cloff + 2] = 0;
    cl->svals[cloff + 3] = 0;
+   cl->dirty = True;
    return;
   slowcase: /* misaligned */
    stats__cline_32to16splits++;
@@ -4629,6 +4677,7 @@
    cl->svals[cloff + 5] = 0;
    cl->svals[cloff + 6] = 0;
    cl->svals[cloff + 7] = 0;
+   cl->dirty = True;
    return;
   slowcase: /* misaligned */
    stats__cline_64to32splits++;
@@ -5007,6 +5056,7 @@
         if (!sm->mbHasShared)
            continue;
         stats_SMs_scanned++;
+        Bool anySh = False;
         initSecMapIter( &itr );
         while (stepSecMapIter( &w32p, &itr, sm )) {
            Bool isM;
@@ -5023,7 +5073,13 @@
                           : mk_SHVAL_ShR(tset_old, lset_new);
            if (wnew != wold)
               *w32p = wnew;
+           anySh = True;
         }
+	if (!anySh) {
+           VG_(printf)("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX discr\n");
+           tl_assert(sm->mbHasShared);
+           sm->mbHasShared = False;
+	}
      }
      HG_(doneIterFM)( map_shmem );
      if (SHOW_EXPENSIVE_STUFF)
@@ -8846,6 +8902,12 @@
    tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE));
    /* also ... a CacheLine holds an integral number of trees */
    tl_assert(0 == (N_LINE_ARANGE % 8));
+
+   tl_assert(sizeof(UInt) == 4);
+   tl_assert(sizeof(ULong) == 8);
+
+   tl_assert(sizeof(WordSetID) == 4);
+   tl_assert(sizeof(SVal) == 8);
 }
 
 VG_DETERMINE_INTERFACE_VERSION(hg_pre_clo_init)
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
_______________________________________________
Valgrind-developers mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/valgrind-developers

Reply via email to