Richard Cochran wrote:
> In any case, I found one huge error in the patch...you mangled my name
> ;)

Here is a better one, with this error fixed, as well as a fix in
switch_mm which makes that now, we really do not flush the cache.
Previous version flushed the cache at each switch due to an error in the
register we checked.

I have tested this version with Xenomai, and we get lower latencies (we
gained 100us).

-- 
                                            Gilles.
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index ad455ff..4481a30 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -10,6 +10,7 @@ obj-y		:= compat.o entry-armv.o entry-common.o irq.o \
 		   process.o ptrace.o setup.o signal.o \
 		   sys_arm.o stacktrace.o time.o traps.o
 
+obj-$(CONFIG_ARM_FCSE)		+= fcse.o
 obj-$(CONFIG_ISA_DMA_API)	+= dma.o
 obj-$(CONFIG_ARCH_ACORN)	+= ecard.o 
 obj-$(CONFIG_FIQ)		+= fiq.o
diff --git a/arch/arm/kernel/fcse.c b/arch/arm/kernel/fcse.c
new file mode 100644
index 0000000..1900a79
--- /dev/null
+++ b/arch/arm/kernel/fcse.c
@@ -0,0 +1,37 @@
+#include <linux/bitops.h>
+#include <linux/memory.h>
+#include <linux/spinlock.h>
+#include <linux/bitops.h>
+#include <asm/fcse.h>
+
+#define MAX_PID (MODULE_START / FCSE_PID_TASK_SIZE)
+#define PIDS_LONGS (MAX_PID + 8 * sizeof(long) - 1) / (8 * sizeof(long))
+
+static spinlock_t fcse_lock = SPIN_LOCK_UNLOCKED;
+static unsigned long fcse_pids_bits[PIDS_LONGS];
+
+int fcse_pid_alloc(void)
+{
+	unsigned long flags;
+	unsigned bit;
+
+	spin_lock_irqsave(&fcse_lock, flags);
+	bit = find_first_zero_bit(fcse_pids_bits, MAX_PID);
+	if (bit == MAX_PID) {
+		spin_unlock(&fcse_lock);
+		return -1;
+	}
+	set_bit(bit, fcse_pids_bits);
+	spin_unlock_irqrestore(&fcse_lock, flags);
+
+	return bit;
+}
+
+void fcse_pid_free(unsigned pid)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&fcse_lock, flags);
+	pid = test_and_clear_bit(pid, fcse_pids_bits);
+	spin_unlock_irqrestore(&fcse_lock, flags);
+}
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index eefae1d..b13d8a5 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -32,6 +32,7 @@
 #include <asm/processor.h>
 #include <asm/tlbflush.h>
 #include <asm/ptrace.h>
+#include <asm/fcse.h>
 
 /*
  * bitmask of present and online CPUs.
@@ -736,14 +737,14 @@ void flush_tlb_all(void)
 
 void flush_tlb_mm(struct mm_struct *mm)
 {
-	cpumask_t mask = mm->cpu_vm_mask;
+	cpumask_t mask = fcse_tlb_mask(mm);
 
 	on_each_cpu_mask(ipi_flush_tlb_mm, mm, 1, 1, mask);
 }
 
 void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
 {
-	cpumask_t mask = vma->vm_mm->cpu_vm_mask;
+	cpumask_t mask = fcse_tlb_mask(vma->vm_mm);
 	struct tlb_args ta;
 
 	ta.ta_vma = vma;
@@ -764,7 +765,7 @@ void flush_tlb_kernel_page(unsigned long kaddr)
 void flush_tlb_range(struct vm_area_struct *vma,
                      unsigned long start, unsigned long end)
 {
-	cpumask_t mask = vma->vm_mm->cpu_vm_mask;
+	cpumask_t mask = fcse_tlb_mask(vma->vm_mm);
 	struct tlb_args ta;
 
 	ta.ta_vma = vma;
diff --git a/arch/arm/kernel/sys_arm.c b/arch/arm/kernel/sys_arm.c
index 0128687..b8d1605 100644
--- a/arch/arm/kernel/sys_arm.c
+++ b/arch/arm/kernel/sys_arm.c
@@ -61,7 +61,18 @@ inline long do_mmap2(
 
 	if (file)
 		fput(file);
+#ifdef CONFIG_ARM_FCSE
+	/* FIXME, this really sucks, and we should really recheck in mremap,
+	   mprotect, and munmap */
+	if (likely((unsigned) error < (unsigned)(-4096))
+	    && (flags & MAP_SHARED) && (prot & PROT_WRITE)) {
+		struct vm_area_struct *vma = find_vma(current->mm, error);
+		if (vma->vm_page_prot & (L_PTE_CACHEABLE | L_PTE_BUFFERABLE))
+			++current->mm->context.mappings_needing_flush;
+	}
+#endif
 out:
+
 	return error;
 }
 
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 33ed048..6c40ac0 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -716,3 +716,10 @@ config CACHE_L2X0
 	select OUTER_CACHE
 	help
 	  This option enables the L2x0 PrimeCell.
+
+config ARM_FCSE
+	bool "Fast Context Switch Extension (EXPERIMENTAL)"
+	depends on EXPERIMENTAL
+	default n
+	help
+	  Say Y here to enable the ARM FCSE. If unsure, say N.
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 28ad7ab..b23d3c9 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -13,11 +13,13 @@
 #include <linux/mm.h>
 #include <linux/init.h>
 #include <linux/kprobes.h>
+#include <linux/kallsyms.h>
 
 #include <asm/system.h>
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 #include <asm/uaccess.h>
+#include <asm/fcse.h>
 
 #include "fault.h"
 
@@ -55,6 +57,10 @@ void show_pte(struct mm_struct *mm, unsigned long addr)
 	if (!mm)
 		mm = &init_mm;
 
+#ifdef CONFIG_ARM_FCSE
+	printk(KERN_ALERT "fcse pid: %ld, 0x%08lx\n",
+	       mm->context.pid >> FCSE_PID_SHIFT, mm->context.pid);
+#endif /* CONFIG_ARM_FCSE */
 	printk(KERN_ALERT "pgd = %p\n", mm->pgd);
 	pgd = pgd_offset(mm, addr);
 	printk(KERN_ALERT "[%08lx] *pgd=%08lx", addr, pgd_val(*pgd));
@@ -466,6 +472,8 @@ do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 	const struct fsr_info *inf = fsr_info + (fsr & 15) + ((fsr & (1 << 10)) >> 6);
 	struct siginfo info;
 
+	addr = fcse_mva_to_va(addr);
+
 	if (!inf->fn(addr, fsr, regs))
 		return;
 
@@ -484,4 +492,3 @@ do_PrefetchAbort(unsigned long addr, struct pt_regs *regs)
 {
 	do_translation_fault(addr, 0, regs);
 }
-
diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
index 9df507d..412a10a 100644
--- a/arch/arm/mm/flush.c
+++ b/arch/arm/mm/flush.c
@@ -14,6 +14,7 @@
 #include <asm/cacheflush.h>
 #include <asm/system.h>
 #include <asm/tlbflush.h>
+#include <asm/fcse.h>
 
 #include "mm.h"
 
@@ -58,9 +59,11 @@ void flush_cache_mm(struct mm_struct *mm)
 void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
 {
 	if (cache_is_vivt()) {
-		if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask))
-			__cpuc_flush_user_range(start & PAGE_MASK, PAGE_ALIGN(end),
-						vma->vm_flags);
+		if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask)) {
+			start = fcse_va_to_mva(vma->vm_mm, start) & PAGE_MASK;
+			end = PAGE_ALIGN(fcse_va_to_mva(vma->vm_mm, end));
+			__cpuc_flush_user_range(start, end, vma->vm_flags);
+		}
 		return;
 	}
 
@@ -78,7 +81,8 @@ void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, unsig
 {
 	if (cache_is_vivt()) {
 		if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask)) {
-			unsigned long addr = user_addr & PAGE_MASK;
+			unsigned long addr;
+			addr = fcse_va_to_mva(vma->vm_mm, user_addr) & PAGE_MASK;
 			__cpuc_flush_user_range(addr, addr + PAGE_SIZE, vma->vm_flags);
 		}
 		return;
diff --git a/arch/arm/mm/pgd.c b/arch/arm/mm/pgd.c
index e0f19ab..4da5e30 100644
--- a/arch/arm/mm/pgd.c
+++ b/arch/arm/mm/pgd.c
@@ -16,7 +16,11 @@
 
 #include "mm.h"
 
+#ifndef CONFIG_ARM_FCSE
 #define FIRST_KERNEL_PGD_NR	(FIRST_USER_PGD_NR + USER_PTRS_PER_PGD)
+#else /* CONFIG_ARM_FCSE */
+#define FIRST_KERNEL_PGD_NR	(MODULE_START / PGDIR_SIZE)
+#endif /* CONFIG_ARM_FCSE */
 
 /*
  * need to get a 16k page for level 1
@@ -26,6 +30,15 @@ pgd_t *get_pgd_slow(struct mm_struct *mm)
 	pgd_t *new_pgd, *init_pgd;
 	pmd_t *new_pmd, *init_pmd;
 	pte_t *new_pte, *init_pte;
+#ifdef CONFIG_ARM_FCSE
+	int pid;
+
+	pid = fcse_pid_alloc();
+	if (pid == -1)
+		goto no_pgd;
+
+	mm->context.pid = pid << FCSE_PID_SHIFT;
+#endif /* CONFIG_ARM_FCSE */
 
 	new_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, 2);
 	if (!new_pgd)
@@ -43,11 +56,15 @@ pgd_t *get_pgd_slow(struct mm_struct *mm)
 	clean_dcache_area(new_pgd, PTRS_PER_PGD * sizeof(pgd_t));
 
 	if (!vectors_high()) {
+		/* We can not use pgd_offset here since mm->pgd is not yet
+		   initialized. */
+		pgd_t *pgd = new_pgd + pgd_index(fcse_va_to_mva(mm, 0));
+
 		/*
 		 * On ARM, first page must always be allocated since it
 		 * contains the machine vectors.
 		 */
-		new_pmd = pmd_alloc(mm, new_pgd, 0);
+		new_pmd = pmd_alloc(mm, pgd, 0);
 		if (!new_pmd)
 			goto no_pmd;
 
@@ -96,4 +113,7 @@ void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd)
 	pmd_free(mm, pmd);
 free:
 	free_pages((unsigned long) pgd, 2);
+#ifdef CONFIG_ARM_FCSE
+	fcse_pid_free(mm->context.pid >> FCSE_PID_SHIFT);
+#endif /* CONFIG_ARM_FCSE */
 }
diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S
index 28cdb06..2454d0b 100644
--- a/arch/arm/mm/proc-arm920.S
+++ b/arch/arm/mm/proc-arm920.S
@@ -321,6 +321,10 @@ ENTRY(cpu_arm920_dcache_clean_area)
 ENTRY(cpu_arm920_switch_mm)
 #ifdef CONFIG_MMU
 	mov	ip, #0
+#ifdef CONFIG_ARM_FCSE
+	cmp	r2, #0
+	beq	.LCnoflush
+#endif
 #ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
 	mcr	p15, 0, ip, c7, c6, 0		@ invalidate D cache
 #else
@@ -338,6 +342,9 @@ ENTRY(cpu_arm920_switch_mm)
 #endif
 	mcr	p15, 0, ip, c7, c5, 0		@ invalidate I cache
 	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+#ifdef CONFIG_ARM_FCSE
+.LCnoflush:
+#endif
 	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
 	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
 #endif
diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S
index 4cd3316..ca7f4ee 100644
--- a/arch/arm/mm/proc-arm926.S
+++ b/arch/arm/mm/proc-arm926.S
@@ -337,6 +337,10 @@ ENTRY(cpu_arm926_dcache_clean_area)
 ENTRY(cpu_arm926_switch_mm)
 #ifdef CONFIG_MMU
 	mov	ip, #0
+#ifdef CONFIG_ARM_FCSE
+	cmp	r2, #0
+	beq	.LCnoflush
+#endif
 #ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
 	mcr	p15, 0, ip, c7, c6, 0		@ invalidate D cache
 #else
@@ -346,6 +350,9 @@ ENTRY(cpu_arm926_switch_mm)
 #endif
 	mcr	p15, 0, ip, c7, c5, 0		@ invalidate I cache
 	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+#ifdef CONFIG_ARM_FCSE
+.LCnoflush:
+#endif
 	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
 	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
 #endif
diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S
index 2dd8527..c101e5d 100644
--- a/arch/arm/mm/proc-xscale.S
+++ b/arch/arm/mm/proc-xscale.S
@@ -417,9 +417,16 @@ ENTRY(cpu_xscale_dcache_clean_area)
  */
 	.align	5
 ENTRY(cpu_xscale_switch_mm)
+#ifdef CONFIG_ARM_FCSE
+	cmp	r2, #0
+	beq	.LCnoflush
+#endif
 	clean_d_cache r1, r2
 	mcr	p15, 0, ip, c7, c5, 0		@ Invalidate I cache & BTB
 	mcr	p15, 0, ip, c7, c10, 4		@ Drain Write (& Fill) Buffer
+#ifdef CONFIG_ARM_FCSE
+.LCnoflush:
+#endif
 	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
 	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
 	cpwait_ret lr, ip
diff --git a/include/asm-arm/cacheflush.h b/include/asm-arm/cacheflush.h
index 759a97b..b4b3b08 100644
--- a/include/asm-arm/cacheflush.h
+++ b/include/asm-arm/cacheflush.h
@@ -15,6 +15,7 @@
 
 #include <asm/glue.h>
 #include <asm/shmparam.h>
+#include <asm/fcse.h>
 
 #define CACHE_COLOUR(vaddr)	((vaddr & (SHMLBA - 1)) >> PAGE_SHIFT)
 
@@ -339,16 +340,20 @@ static inline void flush_cache_mm(struct mm_struct *mm)
 static inline void
 flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
 {
-	if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask))
+	if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask)) {
+		start = fcse_va_to_mva(vma->vm_mm,start);
+		end = fcse_va_to_mva(vma->vm_mm,end);
 		__cpuc_flush_user_range(start & PAGE_MASK, PAGE_ALIGN(end),
 					vma->vm_flags);
+	}
 }
 
 static inline void
 flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, unsigned long pfn)
 {
 	if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask)) {
-		unsigned long addr = user_addr & PAGE_MASK;
+		unsigned long addr;
+		addr = fcse_va_to_mva(vma->vm_mm,user_addr) & PAGE_MASK;
 		__cpuc_flush_user_range(addr, addr + PAGE_SIZE, vma->vm_flags);
 	}
 }
@@ -379,8 +384,14 @@ extern void flush_ptrace_access(struct vm_area_struct *vma, struct page *page,
  * Harvard caches are synchronised for the user space address range.
  * This is used for the ARM private sys_cacheflush system call.
  */
-#define flush_cache_user_range(vma,start,end) \
-	__cpuc_coherent_user_range((start) & PAGE_MASK, PAGE_ALIGN(end))
+#define flush_cache_user_range(vma,start,end)			\
+	({							\
+		struct mm_struct *_mm = (vma)->vm_mm;		\
+		unsigned long _start, _end;			\
+		_start = fcse_va_to_mva(_mm,start) & PAGE_MASK;	\
+		_end = PAGE_ALIGN(fcse_va_to_mva(_mm,end));	\
+		__cpuc_coherent_user_range(_start, _end);	\
+	})
 
 /*
  * Perform necessary cache operations to ensure that data previously
@@ -417,7 +428,7 @@ static inline void flush_anon_page(struct vm_area_struct *vma,
 	extern void __flush_anon_page(struct vm_area_struct *vma,
 				struct page *, unsigned long);
 	if (PageAnon(page))
-		__flush_anon_page(vma, page, vmaddr);
+		__flush_anon_page(vma, page, fcse_va_to_mva(vma->vm_mm,vmaddr));
 }
 
 #define flush_dcache_mmap_lock(mapping) \
diff --git a/include/asm-arm/cpu-multi32.h b/include/asm-arm/cpu-multi32.h
index 3479de9..627daf3 100644
--- a/include/asm-arm/cpu-multi32.h
+++ b/include/asm-arm/cpu-multi32.h
@@ -52,7 +52,7 @@ extern struct processor {
 	/*
 	 * Set the page table
 	 */
-	void (*switch_mm)(unsigned long pgd_phys, struct mm_struct *mm);
+	void (*switch_mm)(unsigned long pgd_phys, struct mm_struct *mm, unsigned cacheflush);
 	/*
 	 * Set a possibly extended PTE.  Non-extended PTEs should
 	 * ignore 'ext'.
@@ -66,4 +66,4 @@ extern struct processor {
 #define cpu_do_idle()			processor._do_idle()
 #define cpu_dcache_clean_area(addr,sz)	processor.dcache_clean_area(addr,sz)
 #define cpu_set_pte_ext(ptep,pte,ext)	processor.set_pte_ext(ptep,pte,ext)
-#define cpu_do_switch_mm(pgd,mm)	processor.switch_mm(pgd,mm)
+#define cpu_do_switch_mm(pgd,mm,flush)	processor.switch_mm(pgd,mm,flush)
diff --git a/include/asm-arm/cpu-single.h b/include/asm-arm/cpu-single.h
index 0b120ee..e3a59f7 100644
--- a/include/asm-arm/cpu-single.h
+++ b/include/asm-arm/cpu-single.h
@@ -39,6 +39,6 @@ extern void cpu_proc_init(void);
 extern void cpu_proc_fin(void);
 extern int cpu_do_idle(void);
 extern void cpu_dcache_clean_area(void *, int);
-extern void cpu_do_switch_mm(unsigned long pgd_phys, struct mm_struct *mm);
+extern void cpu_do_switch_mm(unsigned long pgd_phys, struct mm_struct *mm, unsigned cacheflush);
 extern void cpu_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext);
 extern void cpu_reset(unsigned long addr) __attribute__((noreturn));
diff --git a/include/asm-arm/fcse.h b/include/asm-arm/fcse.h
new file mode 100644
index 0000000..c9ee051
--- /dev/null
+++ b/include/asm-arm/fcse.h
@@ -0,0 +1,78 @@
+/*
+ * Filename:    include/asm-arm/fcse.h                                 
+ * Description: ARM Process ID (PID) includes for Fast Address Space Switching
+ *              (FASS) in ARM Linux.
+ * Created:     14/10/2001
+ * Changes:     19/02/2002 - Macros added.
+ *              03/08/2007 - Adapted to kernel 2.6.21 (ssm)
+ *              Feb 2008   - Simplified a bit (rco)
+ *
+ * Copyright:   (C) 2001, 2002 Adam Wiggins <[EMAIL PROTECTED]>
+ *              (C) 2007 Sebastian Smolorz <[EMAIL PROTECTED]>
+ *              (C) 2008 Richard Cochran
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of teh GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_ARM_FCSE_H
+#define __ASM_ARM_FCSE_H
+
+#ifdef CONFIG_ARM_FCSE
+
+#define FCSE_PID_SHIFT 25
+
+/* Size of PID relocation area */
+#define FCSE_PID_TASK_SIZE (1UL << FCSE_PID_SHIFT)
+
+/* Mask to get rid of PID from relocated address */
+#define FCSE_PID_MASK (FCSE_PID_TASK_SIZE - 1)
+
+#define fcse_tlb_mask(mm) ((mm)->context.cpu_tlb_mask)
+#define fcse_cpu_set_vm_mask(cpu, mm) cpu_set(cpu, (mm)->cpu_vm_mask)
+#define fcse_needs_flush(mm) ((mm)->context.mappings_needing_flush)
+
+/* Sets the CPU's PID Register */
+static inline void fcse_pid_set(unsigned long pid)
+{
+	__asm__ __volatile__("mcr p15, 0, %0, c13, c0, 0": /* */: "r" (pid));
+}
+
+/* Returns the state of the CPU's PID Register */
+static inline unsigned long fcse_pid_get(void)
+{
+	unsigned long pid;
+	__asm__ __volatile__("mrc p15, 0, %0, c13, c0, 0" : "=&r" (pid));
+	return (pid & (~FCSE_PID_MASK));
+}
+
+static inline unsigned long fcse_mva_to_va(unsigned long mva)
+{
+	unsigned long pid = fcse_pid_get();
+	if (pid && (pid == (mva & ~FCSE_PID_MASK))) {
+		return mva & FCSE_PID_MASK;
+	}
+	return mva;
+}
+
+static inline unsigned long fcse_va_to_mva(struct mm_struct *mm, unsigned long va)
+{
+	if (va < FCSE_PID_TASK_SIZE) {
+		return mm->context.pid | va;
+	}
+	return va;
+}
+
+int fcse_pid_alloc(void);
+void fcse_pid_free(unsigned pid);
+
+#else /* CONFIG_ARM_FCSE */
+#define fcse_pid_set(pid) do { } while(0)
+#define fcse_mva_to_va(x) (x)
+#define fcse_va_to_mva(vma,x) (x)
+#define fcse_tlb_mask(mm) ((mm)->cpu_vm_mask)
+#define fcse_cpu_set_vm_mask(cpu, mm) do { } while(0)
+#define fcse_needs_flush(mm) (1)
+#endif
+
+#endif /* __ASM_ARM_FCSE_H */
diff --git a/include/asm-arm/memory.h b/include/asm-arm/memory.h
index 9ba4d71..bda4e74 100644
--- a/include/asm-arm/memory.h
+++ b/include/asm-arm/memory.h
@@ -34,14 +34,23 @@
  * TASK_SIZE - the maximum size of a user space task.
  * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area
  */
+#ifdef CONFIG_ARM_FCSE
+#define TASK_SIZE		UL(0x02000000)
+#define TASK_UNMAPPED_BASE	UL(0x01000000)
+#else
 #define TASK_SIZE		UL(0xbf000000)
 #define TASK_UNMAPPED_BASE	UL(0x40000000)
 #endif
+#endif
 
 /*
  * The maximum size of a 26-bit user space task.
  */
+#ifdef CONFIG_ARM_FCSE
+#define TASK_SIZE_26		UL(0x02000000)
+#else
 #define TASK_SIZE_26		UL(0x04000000)
+#endif
 
 /*
  * Page offset: 3GB
diff --git a/include/asm-arm/mmu.h b/include/asm-arm/mmu.h
index 53099d4..b74d736 100644
--- a/include/asm-arm/mmu.h
+++ b/include/asm-arm/mmu.h
@@ -7,6 +7,11 @@ typedef struct {
 #ifdef CONFIG_CPU_HAS_ASID
 	unsigned int id;
 #endif
+#ifdef CONFIG_ARM_FCSE
+	unsigned long pid;
+	unsigned mappings_needing_flush;
+	cpumask_t cpu_tlb_mask;
+#endif
 	unsigned int kvm_seq;
 } mm_context_t;
 
diff --git a/include/asm-arm/mmu_context.h b/include/asm-arm/mmu_context.h
index 6913d02..47339c2 100644
--- a/include/asm-arm/mmu_context.h
+++ b/include/asm-arm/mmu_context.h
@@ -17,6 +17,7 @@
 #include <asm/cacheflush.h>
 #include <asm/proc-fns.h>
 #include <asm-generic/mm_hooks.h>
+#include <asm/fcse.h>
 
 void __check_kvm_seq(struct mm_struct *mm);
 
@@ -64,7 +65,15 @@ static inline void check_context(struct mm_struct *mm)
 		__check_kvm_seq(mm);
 }
 
-#define init_new_context(tsk,mm)	0
+
+static inline int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+{
+#ifdef CONFIG_ARM_FCSE
+	cpus_clear(mm->context.cpu_tlb_mask);
+	mm->context.mappings_needing_flush = 0;
+#endif /* CONFIG_ARM_FCSE */
+	return 0;
+}
 
 #endif
 
@@ -97,11 +106,13 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
 #ifdef CONFIG_MMU
 	unsigned int cpu = smp_processor_id();
 
-	if (!cpu_test_and_set(cpu, next->cpu_vm_mask) || prev != next) {
+	if (!cpu_test_and_set(cpu, fcse_tlb_mask(next)) || prev != next) {
+		fcse_cpu_set_vm_mask(cpu, next);
 		check_context(next);
-		cpu_switch_mm(next->pgd, next);
+		fcse_pid_set(next->context.pid);
+		cpu_switch_mm(next->pgd, next, fcse_needs_flush(next));
 		if (cache_is_vivt())
-			cpu_clear(cpu, prev->cpu_vm_mask);
+			cpu_clear(cpu, fcse_tlb_mask(prev));
 	}
 #endif
 }
diff --git a/include/asm-arm/pgtable.h b/include/asm-arm/pgtable.h
index 5571c13..701f458 100644
--- a/include/asm-arm/pgtable.h
+++ b/include/asm-arm/pgtable.h
@@ -344,10 +344,14 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd)
 /* to find an entry in a page-table-directory */
 #define pgd_index(addr)		((addr) >> PGDIR_SHIFT)
 
-#define pgd_offset(mm, addr)	((mm)->pgd+pgd_index(addr))
+#define pgd_offset(mm, addr)						\
+	({								\
+		struct mm_struct *_mm = (mm);				\
+		(_mm->pgd + pgd_index(fcse_va_to_mva(_mm,(addr))));	\
+	})
 
 /* to find an entry in a kernel page-table-directory */
-#define pgd_offset_k(addr)	pgd_offset(&init_mm, addr)
+#define pgd_offset_k(addr)	(init_mm.pgd+pgd_index(addr))
 
 /* Find an entry in the second-level page table.. */
 #define pmd_offset(dir, addr)	((pmd_t *)(dir))
diff --git a/include/asm-arm/proc-fns.h b/include/asm-arm/proc-fns.h
index 75ec760..37ba564 100644
--- a/include/asm-arm/proc-fns.h
+++ b/include/asm-arm/proc-fns.h
@@ -223,7 +223,8 @@
 
 #ifdef CONFIG_MMU
 
-#define cpu_switch_mm(pgd,mm) cpu_do_switch_mm(virt_to_phys(pgd),mm)
+#define cpu_switch_mm(pgd,mm,cacheflush) \
+	cpu_do_switch_mm(virt_to_phys(pgd),mm,(cacheflush))
 
 #define cpu_get_pgd()	\
 	({						\
diff --git a/include/asm-arm/tlbflush.h b/include/asm-arm/tlbflush.h
index 8c6bc1b..98cd28f 100644
--- a/include/asm-arm/tlbflush.h
+++ b/include/asm-arm/tlbflush.h
@@ -158,6 +158,7 @@
 #ifndef __ASSEMBLY__
 
 #include <linux/sched.h>
+#include <asm/fcse.h>
 
 struct cpu_tlb_fns {
 	void (*flush_user_range)(unsigned long, unsigned long, struct vm_area_struct *);
@@ -292,7 +293,7 @@ static inline void local_flush_tlb_mm(struct mm_struct *mm)
 	if (tlb_flag(TLB_WB))
 		dsb();
 
-	if (cpu_isset(smp_processor_id(), mm->cpu_vm_mask)) {
+	if (cpu_isset(smp_processor_id(), fcse_tlb_mask(mm))) {
 		if (tlb_flag(TLB_V3_FULL))
 			asm("mcr p15, 0, %0, c6, c0, 0" : : "r" (zero) : "cc");
 		if (tlb_flag(TLB_V4_U_FULL))
@@ -325,12 +326,13 @@ local_flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
 	const int zero = 0;
 	const unsigned int __tlb_flag = __cpu_tlb_flags;
 
-	uaddr = (uaddr & PAGE_MASK) | ASID(vma->vm_mm);
+	uaddr = (fcse_va_to_mva(vma->vm_mm,uaddr) & PAGE_MASK)
+		| ASID(vma->vm_mm);
 
 	if (tlb_flag(TLB_WB))
 		dsb();
 
-	if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask)) {
+	if (cpu_isset(smp_processor_id(), fcse_tlb_mask(vma->vm_mm))) {
 		if (tlb_flag(TLB_V3_PAGE))
 			asm("mcr p15, 0, %0, c6, c0, 0" : : "r" (uaddr) : "cc");
 		if (tlb_flag(TLB_V4_U_PAGE))
@@ -437,7 +439,15 @@ static inline void clean_pmd_entry(pmd_t *pmd)
 /*
  * Convert calls to our calling convention.
  */
-#define local_flush_tlb_range(vma,start,end)	__cpu_flush_user_tlb_range(start,end,vma)
+#define local_flush_tlb_range(vma,start,end)			\
+	({							\
+		struct mm_struct *_mm = (vma)->vm_mm;		\
+		unsigned long _start, _end;			\
+		_start = fcse_va_to_mva(_mm, start);		\
+		_end = fcse_va_to_mva(_mm, end);		\
+		__cpu_flush_user_tlb_range(_start, _end, vma);	\
+	})
+
 #define local_flush_tlb_kernel_range(s,e)	__cpu_flush_kern_tlb_range(s,e)
 
 #ifndef CONFIG_SMP
_______________________________________________
Adeos-main mailing list
[email protected]
https://mail.gna.org/listinfo/adeos-main

Reply via email to