> [...] change SVal to 64-bit at some point (I vote for it!),
I already did that in Nov 07, but did not commit due to increase
in run time and space usage. The resulting patch is attached.
It allows up to 24 bits for lock- and thread-set IDs. I can't
remember now why I limited it to 24 bits -- could go up to about
30 bits.
Really 64-bit for a SVal is too much but 32-bit is not enough.
Something like 48 bits would be a good compromise, but there is
no sensible way to do that in C. Oh well.
The patch also changes CacheLine to have a dirty bit in an attempt
to reduce the performance overhead from writing back cache lines.
That helps, although it also adds to the complexity and overhead
of verifying that the shadow memory system is functioning
correctly.
J
Index: helgrind/hg_main.c
===================================================================
--- helgrind/hg_main.c (revision 7350)
+++ helgrind/hg_main.c (working copy)
@@ -90,7 +90,7 @@
// shadow_mem_make_NoAccess: 29156 SMs, 1728 scanned
// happens_before_wrk: 1000
// ev__post_thread_join: 3360 SMs, 29 scanned, 252 re-Excls
-#define SHOW_EXPENSIVE_STUFF 0
+#define SHOW_EXPENSIVE_STUFF 1
// 0 for silent, 1 for some stuff, 2 for lots of stuff
#define SHOW_EVENTS 0
@@ -113,7 +113,6 @@
#if 0
# define SCE_CACHELINE 1 /* do sanity-check CacheLine stuff */
# define inline __attribute__((noinline))
- /* probably want to ditch -fomit-frame-pointer too */
#else
# define SCE_CACHELINE 0 /* don't sanity-check CacheLine stuff */
#endif
@@ -239,7 +238,7 @@
/*----------------------------------------------------------------*/
/* Shadow values. */
-typedef UInt SVal;
+typedef ULong SVal;
/* These are handles for thread segments. CONSTRAINTS: Must be small
@@ -357,14 +356,15 @@
/* ------ CacheLine ------ */
-#define N_LINE_BITS 5 /* must be >= 3 */
+#define N_LINE_BITS 6 /* must be >= 3 */
#define N_LINE_ARANGE (1 << N_LINE_BITS)
#define N_LINE_TREES (N_LINE_ARANGE >> 3)
typedef
struct {
+ SVal svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8
UShort descrs[N_LINE_TREES];
- SVal svals[N_LINE_ARANGE]; // == N_LINE_TREES * 8
+ Bool dirty;
}
CacheLine;
@@ -422,7 +422,7 @@
Each SecMap must hold a power-of-2 number of CacheLines. Hence
N_SECMAP_BITS must >= N_LINE_BITS.
*/
-#define N_SECMAP_BITS 13
+#define N_SECMAP_BITS 12
#define N_SECMAP_ARANGE (1 << N_SECMAP_BITS)
// # CacheLines held by a SecMap
@@ -484,7 +484,7 @@
if (itr->word_no == N_LINE_ARANGE)
itr->word_no = 0;
} else {
- tl_assert(itr->word_no >= 0 && itr->word_no <= 3);
+ tl_assert(itr->word_no >= 0 && itr->word_no < 4);
tl_assert(lineZ->dict[itr->word_no] != 0);
*pVal = &lineZ->dict[itr->word_no];
itr->word_no++;
@@ -884,26 +884,29 @@
/* Shadow value encodings:
- 11 WordSetID:TSID_BITS WordSetID:LSID_BITS ShM thread-set lock-set
- 10 WordSetID:TSID_BITS WordSetID:LSID_BITS ShR thread-set lock-set
- 01 TSegmentID:30 Excl thread-segment
- 00 0--(20)--0 10 0000 0000 New
- 00 0--(20)--0 01 0000 0000 NoAccess
- 00 0--(20)--0 00 0000 0000 Invalid
+ 11 00 0000 WordSetID:24 0000 0000 WordSetID:24 ShM thread-set lock-set
+ 10 00 0000 WordSetID:24 0000 0000 WordSetID:24 ShR thread-set lock-set
+ 01 0--(30)--0 0000 0000 TSegmentID:24 Excl thread-segment
+ 00 0--(30)--0 0--(22)--0 10 0000 0000 New
+ 00 0--(30)--0 0--(22)--0 01 0000 0000 NoAccess
+ 00 0--(30)--0 0--(22)--0 00 0000 0000 Invalid
- TSID_BITS + LSID_BITS must equal 30.
The elements in thread sets are Thread*, casted to Word.
The elements in lock sets are Lock*, casted to Word.
*/
-#define N_LSID_BITS 17
-#define N_LSID_MASK ((1 << (N_LSID_BITS)) - 1)
-#define N_LSID_SHIFT 0
+#define N_LSID_BITS 24 /* do not change this */
+#define N_LSID_MASK ((1UL << (N_LSID_BITS)) - 1)
+#define N_LSID_SHIFT 0
-#define N_TSID_BITS (30 - (N_LSID_BITS))
-#define N_TSID_MASK ((1 << (N_TSID_BITS)) - 1)
-#define N_TSID_SHIFT (N_LSID_BITS)
+#define N_TSID_BITS 24 /* do not change this */
+#define N_TSID_MASK ((1 << (N_TSID_BITS)) - 1)
+#define N_TSID_SHIFT 32
+#define N_SEGID_BITS 24 /* do not change this */
+#define N_SEGID_MASK ((1UL << (N_SEGID_BITS)) - 1)
+#define N_SEGID_SHIFT 0
+
static inline Bool is_sane_WordSetID_LSet ( WordSetID wset ) {
return wset >= 0 && wset <= N_LSID_MASK;
}
@@ -911,7 +914,6 @@
return wset >= 0 && wset <= N_TSID_MASK;
}
-
__attribute__((noinline))
__attribute__((noreturn))
static void mk_SHVAL_fail ( WordSetID tset, WordSetID lset, HChar* who ) {
@@ -929,8 +931,9 @@
static inline SVal mk_SHVAL_ShM ( WordSetID tset, WordSetID lset ) {
if (LIKELY(is_sane_WordSetID_TSet(tset)
&& is_sane_WordSetID_LSet(lset))) {
- return (SVal)( (3<<30) | (tset << N_TSID_SHIFT)
- | (lset << N_LSID_SHIFT));
+ return (((SVal)3) << 62)
+ | (((SVal)tset) << N_TSID_SHIFT)
+ | (((SVal)lset) << N_LSID_SHIFT);
} else {
mk_SHVAL_fail(tset, lset, "mk_SHVAL_ShM");
}
@@ -938,31 +941,32 @@
static inline SVal mk_SHVAL_ShR ( WordSetID tset, WordSetID lset ) {
if (LIKELY(is_sane_WordSetID_TSet(tset)
&& is_sane_WordSetID_LSet(lset))) {
- return (SVal)( (2<<30) | (tset << N_TSID_SHIFT)
- | (lset << N_LSID_SHIFT) );
+ return (((SVal)2) << 62)
+ | (((SVal)tset) << N_TSID_SHIFT)
+ | (((SVal)lset) << N_LSID_SHIFT);
} else {
mk_SHVAL_fail(tset, lset, "mk_SHVAL_ShR");
}
}
static inline SVal mk_SHVAL_Excl ( SegmentID tseg ) {
tl_assert(is_sane_SegmentID(tseg));
- return (SVal)( (1<<30) | tseg );
+ return (((SVal)1) << 62) | (((SVal)tseg) << N_SEGID_SHIFT);
}
-#define SHVAL_New ((SVal)(2<<8))
-#define SHVAL_NoAccess ((SVal)(1<<8))
-#define SHVAL_Invalid ((SVal)(0<<8))
+#define SHVAL_New (((SVal)2)<<8)
+#define SHVAL_NoAccess (((SVal)1)<<8)
+#define SHVAL_Invalid (((SVal)0)<<8)
static inline Bool is_SHVAL_ShM ( SVal w32 ) {
- return (w32 >> 30) == 3;
+ return (w32 >> 62) == 3;
}
static inline Bool is_SHVAL_ShR ( SVal w32 ) {
- return (w32 >> 30) == 2;
+ return (w32 >> 62) == 2;
}
static inline Bool is_SHVAL_Sh ( SVal w32 ) {
- return (w32 >> 31) == 1;
+ return (w32 >> 63) == 1;
}
static inline Bool is_SHVAL_Excl ( SVal w32 ) {
- return (w32 >> 30) == 1;
+ return (w32 >> 62) == 1;
}
static inline Bool is_SHVAL_New ( SVal w32 ) {
return w32 == SHVAL_New;
@@ -977,7 +981,7 @@
static inline SegmentID un_SHVAL_Excl ( SVal w32 ) {
tl_assert(is_SHVAL_Excl(w32));
- return w32 & ~(3<<30);
+ return w32 & ~(((SVal)3)<<62);
}
static inline WordSetID un_SHVAL_ShR_tset ( SVal w32 ) {
tl_assert(is_SHVAL_ShR(w32));
@@ -3492,6 +3496,7 @@
VG_(printf)("pp_CacheLine(NULL)\n");
return;
}
+ VG_(printf)(" dirty: %d\n", (Int)cl->dirty);
for (i = 0; i < N_LINE_TREES; i++)
VG_(printf)(" descr: %04lx\n", (UWord)cl->descrs[i]);
for (i = 0; i < N_LINE_ARANGE; i++)
@@ -3685,6 +3690,7 @@
Word tno, cloff;
if (!cl) goto bad;
+ if (cl->dirty != False && cl->dirty != True) goto bad;
for (tno = 0, cloff = 0; tno < N_LINE_TREES; tno++, cloff += 8) {
UShort descr = cl->descrs[tno];
@@ -3866,6 +3872,10 @@
if (!is_valid_scache_tag(tag))
return;
+ /* Or there may be no point in writing it. */
+ if (!cl->dirty)
+ return;
+
/* Where are we going to put it? */
sm = NULL;
lineZ = NULL;
@@ -3931,6 +3941,7 @@
if (anyShared)
sm->mbHasShared = True;
+ cl->dirty = False;
/* mb_tidy_one_cacheline(); */
}
@@ -3974,7 +3985,7 @@
for (i = 0; i < N_LINE_ARANGE; i++) {
SVal sv;
UWord ix = read_twobit_array( lineZ->ix2s, i );
- tl_assert(ix >= 0 && ix <= 3);
+ tl_assert(ix >= 0 && ix < 4);
sv = lineZ->dict[ix];
tl_assert(sv != 0);
cl->svals[i] = sv;
@@ -3982,6 +3993,7 @@
stats__cache_Z_fetches++;
}
normalise_CacheLine( cl );
+ cl->dirty = False;
}
static void shmem__invalidate_scache ( void ) {
@@ -3990,6 +4002,7 @@
tl_assert(!is_valid_scache_tag(1));
for (wix = 0; wix < N_WAY_NENT; wix++) {
cache_shmem.tags0[wix] = 1/*INVALID*/;
+ cache_shmem.lyns0[wix].dirty = False;
}
stats__cache_invals++;
}
@@ -4277,7 +4290,10 @@
}
svOld = cl->svals[cloff];
svNew = msm__handle_read( thr_acc, a, svOld, 1 );
- cl->svals[cloff] = svNew;
+ if (svNew != svOld) {
+ cl->svals[cloff] = svNew;
+ cl->dirty = True;
+ }
}
static void shadow_mem_read16 ( Thread* thr_acc, Addr a, SVal uuOpaque ) {
CacheLine* cl;
@@ -4303,7 +4319,10 @@
}
svOld = cl->svals[cloff];
svNew = msm__handle_read( thr_acc, a, svOld, 2 );
- cl->svals[cloff] = svNew;
+ if (svNew != svOld) {
+ cl->svals[cloff] = svNew;
+ cl->dirty = True;
+ }
return;
slowcase: /* misaligned, or must go further down the tree */
stats__cline_16to8splits++;
@@ -4335,7 +4354,10 @@
}
svOld = cl->svals[cloff];
svNew = msm__handle_read( thr_acc, a, svOld, 4 );
- cl->svals[cloff] = svNew;
+ if (svNew != svOld) {
+ cl->svals[cloff] = svNew;
+ cl->dirty = True;
+ }
return;
slowcase: /* misaligned, or must go further down the tree */
stats__cline_32to16splits++;
@@ -4346,6 +4368,7 @@
static void shadow_mem_read32 ( Thread* thr_acc, Addr a, SVal uuOpaque ) {
CacheLine* cl;
UWord cloff, tno, toff;
+ SVal svOld, svNew;
UShort descr;
stats__cline_read32s++;
if (UNLIKELY(!aligned32(a))) goto slowcase;
@@ -4355,8 +4378,11 @@
toff = get_tree_offset(a); /* == 0 or 4 */
descr = cl->descrs[tno];
if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) goto slowcase;
- { SVal* p = &cl->svals[cloff];
- *p = msm__handle_read( thr_acc, a, *p, 4 );
+ svOld = cl->svals[cloff];
+ svNew = msm__handle_read( thr_acc, a, svOld, 4 );
+ if (svNew != svOld) {
+ cl->svals[cloff] = svNew;
+ cl->dirty = True;
}
return;
slowcase: /* misaligned, or not at this level in the tree */
@@ -4381,7 +4407,10 @@
}
svOld = cl->svals[cloff];
svNew = msm__handle_read( thr_acc, a, svOld, 8 );
- cl->svals[cloff] = svNew;
+ if (svNew != svOld) {
+ cl->svals[cloff] = svNew;
+ cl->dirty = True;
+ }
return;
slowcase: /* misaligned, or must go further down the tree */
stats__cline_64to32splits++;
@@ -4408,7 +4437,10 @@
}
svOld = cl->svals[cloff];
svNew = msm__handle_write( thr_acc, a, svOld, 1 );
- cl->svals[cloff] = svNew;
+ if (svNew != svOld) {
+ cl->svals[cloff] = svNew;
+ cl->dirty = True;
+ }
}
static void shadow_mem_write16 ( Thread* thr_acc, Addr a, SVal uuOpaque ) {
CacheLine* cl;
@@ -4434,7 +4466,10 @@
}
svOld = cl->svals[cloff];
svNew = msm__handle_write( thr_acc, a, svOld, 2 );
- cl->svals[cloff] = svNew;
+ if (svNew != svOld) {
+ cl->svals[cloff] = svNew;
+ cl->dirty = True;
+ }
return;
slowcase: /* misaligned, or must go further down the tree */
stats__cline_16to8splits++;
@@ -4466,7 +4501,10 @@
}
svOld = cl->svals[cloff];
svNew = msm__handle_write( thr_acc, a, svOld, 4 );
- cl->svals[cloff] = svNew;
+ if (svNew != svOld) {
+ cl->svals[cloff] = svNew;
+ cl->dirty = True;
+ }
return;
slowcase: /* misaligned, or must go further down the tree */
stats__cline_32to16splits++;
@@ -4477,6 +4515,7 @@
static void shadow_mem_write32 ( Thread* thr_acc, Addr a, SVal uuOpaque ) {
CacheLine* cl;
UWord cloff, tno, toff;
+ SVal svOld, svNew;
UShort descr;
stats__cline_write32s++;
if (UNLIKELY(!aligned32(a))) goto slowcase;
@@ -4486,8 +4525,11 @@
toff = get_tree_offset(a); /* == 0 or 4 */
descr = cl->descrs[tno];
if (UNLIKELY( !(descr & (TREE_DESCR_32_0 << toff)) )) goto slowcase;
- { SVal* p = &cl->svals[cloff];
- *p = msm__handle_write( thr_acc, a, *p, 4 );
+ svOld = cl->svals[cloff];
+ svNew = msm__handle_write( thr_acc, a, svOld, 4 );
+ if (svNew != svOld) {
+ cl->svals[cloff] = svNew;
+ cl->dirty = True;
}
return;
slowcase: /* misaligned, or must go further down the tree */
@@ -4512,7 +4554,10 @@
}
svOld = cl->svals[cloff];
svNew = msm__handle_write( thr_acc, a, svOld, 8 );
- cl->svals[cloff] = svNew;
+ if (svNew != svOld) {
+ cl->svals[cloff] = svNew;
+ cl->dirty = True;
+ }
return;
slowcase: /* misaligned, or must go further down the tree */
stats__cline_64to32splits++;
@@ -4537,6 +4582,7 @@
tl_assert(is_sane_CacheLine(cl)); /* EXPENSIVE */
}
cl->svals[cloff] = svNew;
+ cl->dirty = True;
}
static void shadow_mem_set16 ( Thread* uu_thr_acc, Addr a, SVal svNew ) {
CacheLine* cl;
@@ -4567,6 +4613,7 @@
}
cl->svals[cloff + 0] = svNew;
cl->svals[cloff + 1] = 0;
+ cl->dirty = True;
return;
slowcase: /* misaligned */
stats__cline_16to8splits++;
@@ -4604,6 +4651,7 @@
cl->svals[cloff + 1] = 0;
cl->svals[cloff + 2] = 0;
cl->svals[cloff + 3] = 0;
+ cl->dirty = True;
return;
slowcase: /* misaligned */
stats__cline_32to16splits++;
@@ -4629,6 +4677,7 @@
cl->svals[cloff + 5] = 0;
cl->svals[cloff + 6] = 0;
cl->svals[cloff + 7] = 0;
+ cl->dirty = True;
return;
slowcase: /* misaligned */
stats__cline_64to32splits++;
@@ -5007,6 +5056,7 @@
if (!sm->mbHasShared)
continue;
stats_SMs_scanned++;
+ Bool anySh = False;
initSecMapIter( &itr );
while (stepSecMapIter( &w32p, &itr, sm )) {
Bool isM;
@@ -5023,7 +5073,13 @@
: mk_SHVAL_ShR(tset_old, lset_new);
if (wnew != wold)
*w32p = wnew;
+ anySh = True;
}
+ if (!anySh) {
+ VG_(printf)("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX discr\n");
+ tl_assert(sm->mbHasShared);
+ sm->mbHasShared = False;
+ }
}
HG_(doneIterFM)( map_shmem );
if (SHOW_EXPENSIVE_STUFF)
@@ -8846,6 +8902,12 @@
tl_assert(0 == (N_SECMAP_ARANGE % N_LINE_ARANGE));
/* also ... a CacheLine holds an integral number of trees */
tl_assert(0 == (N_LINE_ARANGE % 8));
+
+ tl_assert(sizeof(UInt) == 4);
+ tl_assert(sizeof(ULong) == 8);
+
+ tl_assert(sizeof(WordSetID) == 4);
+ tl_assert(sizeof(SVal) == 8);
}
VG_DETERMINE_INTERFACE_VERSION(hg_pre_clo_init)
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
_______________________________________________
Valgrind-developers mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/valgrind-developers