We just tracked down an entertaining bug that infests the
    Q.E.D. RM52xx series and Linux.

    A close examination of the QED docs shows that the 52xx
    series uses 2 low order bits of the virtual address to
    index the on-chip cache.  QED thought this information was
    important enought that they mentioned it exactly once, in
    the table showing the Cache Error Register - register 27.
    In the rev 3.0 doc that we have lying around, this is in
    table 5.9.

    The bug is that if a region is mapped at addresses that are
    not 16 kbyte aligned, then the obvious bad things happen.
    This doesn't effect pthreads because they tend to load the
    same shared libraries in the same order, but other shared
    region programs have problems.

    Entertainingly, Linux has no provision for virtually tagged
    caches.  This seems to be true in out 2.0.34 base and, based
    on inspection, 2.2.12.

    I included our test & 2.0.34 fix below.  Ideally, there would
    be a concept of "platform specific" in Linux, that would be
    distinct from "processor specific".  In the current layout,
    I suppose the mips specific code could probe the processor,
    and dynamically set up the virtual cache mode.  Our solution
    uses 'CONFIG_QED_RM52XX'.

    cj*

============================================================
Test program:
    cc -o mmap_test mmap_test.c
    mmap_test [align_gap_in_kbytes]
===============

#include <unistd.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>

main(int ac, char **av)
{
    int fd;
    void *base, *base2;
    int mult = 4;
    int len = 64 * 1024;
    char buf[64];


    if (ac > 1)
        mult = atoi(av[1]);

    if ((fd = open("/tmp/foo", O_RDWR|O_CREAT, 0777)) < 0) {
        perror("open failed");
        exit(42);
    }

    memset(buf, 0, sizeof(buf));

    if (write(fd, buf, sizeof(buf)) != sizeof(buf)) {
        perror("write failed");
        exit(42);
    }

    base = mmap(0, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);

    if ((int) base < 0) {
        perror("bad base");
        exit(42);
    }

    printf("base1 0x%x\n", (int) base);

    base2 = mmap(base + len + mult * 1024, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 
0);

    if ((int) base2 < 0) {
        perror("bad base2");
        exit(42);
    }

    printf("base2 0x%x\n", (int) base2);

    * (int *) base = 42;

    if (* (int *) base2 != 42) {
        printf("failed\n");
    }
    else printf("passed\n");
}

============================================================
Fix - 2.0.34
2.2 and 2.3 left as an exercise.
===============
RCS file: /fargo/cvs/linux/mm/mmap.c,v
retrieving revision 1.9
retrieving revision 1.11
diff -c -6 -r1.9 -r1.11
*** mm/mmap.c   1998/06/13 11:00:32     1.9
--- mm/mmap.c   1999/10/01 00:06:55     1.11
***************
*** 331,363 ****
                } while (len > 0);
        }
        return addr;
  }
  
  /*
   * Get an address range which is currently unmapped.
   * For mmap() without MAP_FIXED and shmat() with addr=0.
   * Return value 0 means ENOMEM.
   */
  unsigned long get_unmapped_area(unsigned long addr, unsigned long len)
  {
        struct vm_area_struct * vmm;
  
        if (len > MAX_USER_ADDR)
                return 0;
        if (!addr)
                addr = MMAP_SEARCH_START;
-       addr = PAGE_ALIGN(addr);
  
        for (vmm = find_vma(current->mm, addr); ; vmm = vmm->vm_next) {
                /* At this point:  (!vmm || addr < vmm->vm_end). */
                if (MAX_USER_ADDR - len < addr)
                        return 0;
                if (!vmm || addr + len <= vmm->vm_start)
                        return addr;
!               addr = vmm->vm_end;
        }
  }
  
  /*
   * Searching a VMA in the linear list task->mm->mmap is horribly slow.
   * Use an AVL (Adelson-Velskii and Landis) tree to speed up this search
--- 333,377 ----
                } while (len > 0);
        }
        return addr;
  }
  
  /*
+  * CACHE_ALIGN is cache specific to deal with virtually tagged caches.
+  * For most systems, PAGE_ALIGN is sufficient, but some processors or
+  * board designs may be cache impaired.
+  */
+ 
+ #ifndef CACHE_ALIGN
+ #define CACHE_ALIGN PAGE_ALIGN
+ #endif
+ 
+ /*
   * Get an address range which is currently unmapped.
   * For mmap() without MAP_FIXED and shmat() with addr=0.
   * Return value 0 means ENOMEM.
   */
  unsigned long get_unmapped_area(unsigned long addr, unsigned long len)
  {
        struct vm_area_struct * vmm;
  
        if (len > MAX_USER_ADDR)
                return 0;
        if (!addr)
                addr = MMAP_SEARCH_START;
  
+       addr = CACHE_ALIGN(addr);
+ 
        for (vmm = find_vma(current->mm, addr); ; vmm = vmm->vm_next) {
+ 
                /* At this point:  (!vmm || addr < vmm->vm_end). */
                if (MAX_USER_ADDR - len < addr)
                        return 0;
                if (!vmm || addr + len <= vmm->vm_start)
                        return addr;
!               addr = CACHE_ALIGN(vmm->vm_end);
        }
  }
  
  /*
   * Searching a VMA in the linear list task->mm->mmap is horribly slow.
   * Use an AVL (Adelson-Velskii and Landis) tree to speed up this search
*** include/asm-mips/processor.h 1998/06/18 05:52:23     1.7
--- include/asm-mips/processor.h 1999/09/30 22:47:50     1.8
***************
*** 48,53 ****
--- 48,62 ----
  #define TASK_SIZE     (0x80000000UL)
  #define MAX_USER_ADDR TASK_SIZE
  #define MMAP_SEARCH_START (TASK_SIZE/3)
+ 
+ #ifdef CONFIG_QED_RM52XX
+     /*
+      * QED 25xx family uses 2 bits of virtual address in cache tag,
+      * so we use the following macro in mm/mmap.c to adjust shared
+      * regions to work properly.
+      */
+ #define CACHE_ALIGN(addr)     (((addr) + 0x3fff) & ~0x3fff)
+ #endif
  
  /*
   * Size of io_bitmap in longwords: 32 is ports 0-0x3ff.

Reply via email to