Description:
We have a system with a group of processes accessing a region of
memory mapped into each process using mmap. These processes use locks
for synchronization and generally read and write to the region of
memory. We have isolated a problem where when one of the processes
exits, or merely unmaps the region of memory, an older value can end
up overwriting a newer value written by another process. As if munmap
temporarily disconnects cache conherency and then flushes a stale
value from the cache of the processor doing the munmap.
Please see the enclosed test program.
-----
Keywords: SMP, kernel, memory management, mmap
-----
Kernel version:
Linux version 2.2.10 ([EMAIL PROTECTED]) (gcc version egcs-2.91.66 19990314/Linux
(egcs-1.1.2 release)) #4 SMP Thu Jul 29 14:24:44 PDT 1999
Also happens on RedHat 6.0 2.2.5-22smp
-----
Test program:
Assuming the following is copied into a file called locktest.cpp,
compile with:
gcc locktest.cpp -o locktest
Invoke locktest with no arguments. It will either quickly produce output
with the word "ERROR" or run for a minute or two and end with "Terminated"
or no output at all. If ERROR does not appear, the program has executed
correctly.
The preprocessor #define DO_MUNMAP can be changed to 0 to provide a sanity
check that the locking logic is not broken. It should serve as a control
variable for the bug. Note that this is a simplified version of another
test program we wrote that uses many processes. From results with the other
program, we're pretty sure its the munmap, not the mmap, that is causing
the problem. I can make this other program available if need be.
=====Begin locktest.cpp=====
#include <sys/file.h>
#include <unistd.h>
#include <errno.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include <pwd.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/mman.h>
#include <sys/wait.h>
#include <signal.h>
static pid_t parentPID = 0; // Process ID of the parent process.
static pid_t childPID = 0; // Process ID of this process.
static pid_t ourPID = 0; // Process ID of this process.
static int poolfd = -1;
static char* sharedBase; // Base address for the shared memory area
static long sharedLen; // Length of the shared memory area
static int numSpins = 0;
/* If the first parameter is zero, then print an error to fprintf and exit,
* incorporating label (a string) into the error message.
*/
#define CHECKRESULT(ok, label) \
if (!(ok)) \
{ \
pid_t pid = getpid(); \
const char* piddesc = (pid == parentPID) ? "parent" : "child"; \
fprintf( stderr, "ERROR, errno=%d, pid=%d (%s) after %d spins: %s\n", \
errno, pid, piddesc, numSpins, (label)); \
kill(childPID, SIGTERM);
\
exit(1); \
}
/*************************************************************************/
/********************************* Locks *********************************/
/*************************************************************************/
static int TestAndSet(volatile int *location)
{
int was_locked;
__asm__ __volatile__ ("xchgl %0, %1"
: "=r"(was_locked), "=m"(*location)
: "0"(1), "m"(*location)
: "memory");
return was_locked;
} // TestAndSet
class LockInfo
{
public:
void Init();
void Lock();
void Unlock();
private:
volatile int lock; // 1 if held, 0 otherwise.
volatile pid_t owner; // currently owning process, or 0 if none.
};
void LockInfo::Init()
{
lock = 0;
owner = 0;
} // Init
void LockInfo::Lock()
{
CHECKRESULT(owner != ourPID, "LockInfo::Lock sanity test")
while (TestAndSet(&lock))
;
CHECKRESULT(lock == 1, "LockInfo::Lock lock==1 test")
CHECKRESULT(owner == 0, "LockInfo::Lock owner==0 test")
owner = ourPID;
CHECKRESULT(owner == ourPID, "LockInfo::Lock owner==ourPID test")
CHECKRESULT(lock == 1, "LockInfo::Lock second lock==1 test")
} // Lock
void LockInfo::Unlock()
{
CHECKRESULT(lock == 1, "LockInfo::Lock lock==1 test")
CHECKRESULT(owner == ourPID, "LockInfo::Lock owner==ourPID test")
owner = 0;
lock = 0;
} // Unlock
/*************************************************************************/
/****************************** Test driver ******************************/
/*************************************************************************/
struct GlobalInfo
{
LockInfo lock;
volatile int theAnswer;
} *globals;
// Code generally blows up around 45 spins or so on
// a 450MHz PIII dual processor. So if it gets this
// far assume it works.
#define MAXSPINS 2000000
static void IncrementAnswer()
{
globals->lock.Lock();
globals->theAnswer++;
globals->lock.Unlock();
if (globals->theAnswer > MAXSPINS)
{
kill(parentPID, SIGTERM);
exit(0);
}
}
static void TestConsistency()
{
globals->lock.Lock();
int oldValue = globals->theAnswer;
globals->theAnswer = oldValue + 1;
for (int i = 0; i < 100; i++)
CHECKRESULT(globals->theAnswer > oldValue, "value reverted");
globals->lock.Unlock();
if (globals->theAnswer > MAXSPINS)
{
kill(childPID, SIGTERM);
exit(0);
}
}
static void SetupSharedPool()
{
sharedLen = sizeof(GlobalInfo);
// Open the pool file.
poolfd = ::open("locktest_pool", O_CREAT|O_RDWR, 0700);
CHECKRESULT(poolfd != -1, "opening locktest_pool")
int result = ::flock(poolfd, LOCK_EX);
CHECKRESULT(result == 0, "locking pool file")
/* Map the pool file into memory.
*
* ALERT: This constant 0x50000000 should be "discovered"
* somehow. Probably by reading /proc/self/maps and snagging the
* largest hole...
*/
sharedBase = (char*) 0x50000000;
struct stat sbuf;
result = fstat (poolfd, &sbuf);
CHECKRESULT(result == 0, "fstat on pool file")
int firstTime;
if (sbuf.st_size <= 0)
{
// First time through, need to initialize the shared area.
char buf[1];
buf[0] = 255;
off_t lseekResult = lseek(poolfd, sharedLen - 1, SEEK_SET);
CHECKRESULT(lseekResult != -1, "lseek to grow pool file")
result = write(poolfd, &buf, 1);
CHECKRESULT(result == 1, "writing dummy byte at end of pool file")
firstTime = 1;
}
else
{
// Second time through, just map the existing shared area into place.
firstTime = 0;
}
void* mmap_result = mmap( sharedBase, sharedLen, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_FIXED, poolfd, 0 );
CHECKRESULT(mmap_result != MAP_FAILED, "mmap returned MAP_FAILED")
CHECKRESULT(mmap_result == (void*)sharedBase, "mmap didn't return sharedBase")
globals = (GlobalInfo*)sharedBase;
if (firstTime)
{
// Initialize the lock object.
globals->lock.Init();
globals->theAnswer = 0;
}
result = ::flock(poolfd, LOCK_UN);
CHECKRESULT(result == 0, "unlocking pool file")
} // SetupSharedPool
int main(int argc, char **argv)
{
parentPID = getpid();
/* Delete any old pool file. Note that we don't check the error code
* here, since the pool file might not have existed. We ought to check
* for other errors, oh well.
*/
unlink("locktest_pool");
pid_t forkResult;
forkResult = fork();
CHECKRESULT(forkResult != -1, "First fork()");
if (forkResult == 0)
{
// We are the child.
ourPID = getpid();
#define DO_MUNMAP 1
#if !DO_MUNMAP
SetupSharedPool();
#endif
while (1)
{
#if DO_MUNMAP
SetupSharedPool();
#endif
IncrementAnswer();
#if DO_MUNMAP
if (munmap(sharedBase, sharedLen) < 0)
fprintf(stderr, "munmap failed.\n");
#endif
numSpins++;
close(poolfd);
}
}
else
childPID = forkResult;
// We are the child.
ourPID = getpid();
SetupSharedPool();
while (1)
{
TestConsistency();
numSpins++;
}
return 0;
} // main
=====End locktest.cpp=====
-----
Output of ver_linux:
Linux workingpages.com 2.2.10 #4 SMP Thu Jul 29 14:24:44 PDT 1999 i686 unknown
Kernel modules 2.1.121
Gnu C egcs-2.91.66
Binutils 2.9.1.0.23
Linux C Library 2.1.1
Dynamic linker ldd (GNU libc) 2.1.1
Procps 2.0.2
Mount 2.9o
Net-tools 1.51
Console-tools 1999.03.02
Sh-utils 1.16
Modules Loaded nfsd nfs lockd sunrpc raid1
-----
Contents of /proc/cpuinfo:
processor : 0
vendor_id : GenuineIntel
cpu family : 6
model : 7
model name : 00/07
stepping : 2
cpu MHz : 451.027804
cache size : 512 KB
fdiv_bug : no
hlt_bug : no
sep_bug : no
f00f_bug : no
coma_bug : no
fpu : yes
fpu_exception : yes
cpuid level : 3
wp : yes
flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat
pse36 psn mmx osfxsr kni
bogomips : 448.92
processor : 1
vendor_id : GenuineIntel
cpu family : 6
model : 7
model name : 00/07
stepping : 2
cpu MHz : 451.027804
cache size : 512 KB
fdiv_bug : no
hlt_bug : no
sep_bug : no
f00f_bug : no
coma_bug : no
fpu : yes
fpu_exception : yes
cpuid level : 3
wp : yes
flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat
pse36 psn mmx osfxsr kni
bogomips : 450.56
-----
-
Linux SMP list: FIRST see FAQ at http://www.irisa.fr/prive/mentre/smp-faq/
To Unsubscribe: send "unsubscribe linux-smp" to [EMAIL PROTECTED]