On Tue, 2011-11-15 at 10:22 -0500, Kyle Moffett wrote: > [My apologies for the resend, it does not seem to have hit the MLs. > I think my git send-email "cc-cmd" may have broken somehow, oops.]
Or the ML took a while because it's big :-) I got both. I'll try to review this week. Probably wont get to it today tho. Thanks for looking at this ! Cheers, Ben. > This badly needs breaking up, and a better changelog... oh well... > > The big changes: > > * The "ppc64_caches" structure is now "powerpc_caches" and is used on > both PPC32 and PPC64. I hated staring at the pages and pages of > assembly code, so nearly all of the functions are now C with tiny > snippets of inline ASM in the loops. > > * Lots of ugly assembly functions in arch/powerpc/kernel/misc_*.S were > rewritten as cleaner inline ASM in arch/powerpc/mm/cache.c > > * I'm not sure that the physical address functions from those files > actually came out cleaner, but they are now more correct. > > * I'm not 100% sure I like the new FOR_EACH_CACHE_LINE() macro, but it > sure does make a lot of the other code much cleaner. > > * I have a bit of a temptation to try to merge the 32/64-bit variants > of copy_page() into a single C function. A quick test seems to show > that I can get nearly identical output to the 64-bit ASM with very > little work. > > > --- > arch/powerpc/include/asm/cache.h | 155 ++++++++++++--- > arch/powerpc/include/asm/cacheflush.h | 3 - > arch/powerpc/include/asm/page.h | 6 + > arch/powerpc/include/asm/page_32.h | 4 +- > arch/powerpc/include/asm/page_64.h | 17 -- > arch/powerpc/kernel/align.c | 7 +- > arch/powerpc/kernel/asm-offsets.c | 13 +- > arch/powerpc/kernel/head_32.S | 9 +- > arch/powerpc/kernel/head_64.S | 2 +- > arch/powerpc/kernel/misc_32.S | 193 ------------------ > arch/powerpc/kernel/misc_64.S | 182 ----------------- > arch/powerpc/kernel/ppc_ksyms.c | 3 - > arch/powerpc/kernel/setup-common.c | 103 ++++++++++ > arch/powerpc/kernel/setup.h | 1 + > arch/powerpc/kernel/setup_32.c | 11 +- > arch/powerpc/kernel/setup_64.c | 118 +---------- > arch/powerpc/kernel/vdso.c | 27 +-- > arch/powerpc/lib/copypage_64.S | 10 +- > arch/powerpc/mm/Makefile | 2 +- > arch/powerpc/mm/cache.c | 279 > ++++++++++++++++++++++++++ > arch/powerpc/mm/dma-noncoherent.c | 2 +- > arch/powerpc/platforms/52xx/lite5200_sleep.S | 9 +- > arch/powerpc/platforms/powermac/pci.c | 2 +- > arch/powerpc/xmon/xmon.c | 53 +++--- > drivers/macintosh/smu.c | 8 +- > 25 files changed, 599 insertions(+), 620 deletions(-) > create mode 100644 arch/powerpc/mm/cache.c > > diff --git a/arch/powerpc/include/asm/cache.h > b/arch/powerpc/include/asm/cache.h > index 4b50941..b1dc08f 100644 > --- a/arch/powerpc/include/asm/cache.h > +++ b/arch/powerpc/include/asm/cache.h > @@ -3,47 +3,142 @@ > > #ifdef __KERNEL__ > > - > -/* bytes per L1 cache line */ > -#if defined(CONFIG_8xx) || defined(CONFIG_403GCX) > -#define L1_CACHE_SHIFT 4 > -#define MAX_COPY_PREFETCH 1 > +/* > + * Various PowerPC CPUs which are otherwise compatible have different L1 > + * cache line sizes. > + * > + * Unfortunately, lots of kernel code assumes that L1_CACHE_BYTES and > + * L1_CACHE_SHIFT are compile-time constants that can be used to align > + * data-structures to avoid false cacheline sharing, so we can't just > + * compute them at runtime from the cputable values. > + * > + * So for alignment purposes, we will compute these values as safe maximums > + * of all the CPU support compiled into the kernel. > + */ > +#if defined(CONFIG_PPC64) || defined(CONFIG_PPC_47x) > +# define L1_CACHE_SHIFT_MAX 7 /* 128-byte cache blocks */ > #elif defined(CONFIG_PPC_E500MC) > -#define L1_CACHE_SHIFT 6 > -#define MAX_COPY_PREFETCH 4 > -#elif defined(CONFIG_PPC32) > -#define MAX_COPY_PREFETCH 4 > -#if defined(CONFIG_PPC_47x) > -#define L1_CACHE_SHIFT 7 > +# define L1_CACHE_SHIFT_MAX 6 /* 64-byte cache blocks */ > #else > -#define L1_CACHE_SHIFT 5 > +# define L1_CACHE_SHIFT_MAX 5 /* 32-byte cache blocks */ > #endif > +#define L1_CACHE_BYTES_MAX (1 << L1_CACHE_SHIFT_MAX) > + > +#define L1_CACHE_SHIFT L1_CACHE_SHIFT_MAX > +#define L1_CACHE_BYTES L1_CACHE_BYTES_MAX > +#define SMP_CACHE_BYTES L1_CACHE_BYTES_MAX > + > +/* > + * Unfortunately, for other purposes, we can't just use a safe maximum value > + * because it gets used in loops when invalidating or clearing cachelines and > + * it would be very bad to only flush/invalidate/zero/etc every 4th one. > + * > + * During early initialization we load these values from the device-tree and > + * the cputable into the powerpc_caches structure, but we need to be able to > + * clear pages before that occurs, so these need sane default values. > + * > + * As explained in the powerpc_caches structure definition, the defaults > + * should be safe minimums, so that's what we compute here. > + */ > +#if defined(CONFIG_8xx) || defined(CONFIG_403GCX) > +# define L1_CACHE_SHIFT_MIN 4 /* 16-byte cache blocks */ > +#elif defined(CONFIG_PPC32) > +# define L1_CACHE_SHIFT_MIN 5 /* 32-byte cache blocks */ > #else /* CONFIG_PPC64 */ > -#define L1_CACHE_SHIFT 7 > +# define L1_CACHE_SHIFT_MIN 6 /* 64-byte cache blocks */ > #endif > +#define L1_CACHE_BYTES_MIN (1 << L1_CACHE_SHIFT_MIN) > > -#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) > +/* > + * Apparently the 8xx and the 403GCX have tiny caches, so they never prefetch > + * more than a single cacheline in the ASM memory copy functions. > + * > + * All other 32-bit CPUs prefetch 4 cachelines, and the 64-bit CPUs have > + * their own copy routines which prefetch the entire page. > + */ > +#ifdef PPC32 > +# if defined(CONFIG_8xx) || defined(CONFIG_403GCX) > +# define MAX_COPY_PREFETCH 1 > +# else > +# define MAX_COPY_PREFETCH 4 > +# endif > +#endif > > -#define SMP_CACHE_BYTES L1_CACHE_BYTES > +#ifndef __ASSEMBLY__ > > -#if defined(__powerpc64__) && !defined(__ASSEMBLY__) > -struct ppc64_caches { > - u32 dsize; /* L1 d-cache size */ > - u32 dline_size; /* L1 d-cache line size */ > - u32 log_dline_size; > - u32 dlines_per_page; > - u32 isize; /* L1 i-cache size */ > - u32 iline_size; /* L1 i-cache line size */ > - u32 log_iline_size; > - u32 ilines_per_page; > -}; > +/* > + * A handy macro to iterate over all the cachelines referring to memory from > + * "START" through "STOP - 1", inclusive. > + */ > +#define FOR_EACH_CACHELINE(LINE, START, STOP, CACHE) \ > + for (u32 linesize__ = powerpc_caches.CACHE##_block_bytes, \ > + (LINE) = (START) & ~(linesize__ - 1); \ > + (LINE) < (STOP); (LINE) += linesize__) > + > +/* Write out a data cache block if it is dirty */ > +static inline void dcbst(unsigned long addr) > +{ > + asm volatile("dcbst %y0" :: "Z"(addr) : "memory"); > +} > > -extern struct ppc64_caches ppc64_caches; > -#endif /* __powerpc64__ && ! __ASSEMBLY__ */ > +/* Invalidate a data cache block (will lose data if dirty!) */ > +static inline void dcbi(unsigned long addr) > +{ > + asm volatile("dcbi %y0" :: "Z"(addr) : "memory"); > +} > + > +/* Write out (if dirty) and invalidate a data cache block */ > +static inline void dcbf(unsigned long addr) > +{ > + asm volatile("dcbf %y0" :: "Z"(addr) : "memory"); > +} > + > +/* Populate a data cache block with zeros */ > +static inline void dcbz(unsigned long addr) > +{ > + asm volatile("dcbz %y0" :: "Z"(addr) : "memory"); > +} > + > +/* Invalidate an instruction cache block */ > +static inline void icbi(unsigned long addr) > +{ > + asm volatile("icbi %y0" :: "Z"(addr) : "memory"); > +} > + > +/* > + * This structure contains the various PowerPC cache parameters computed > + * shortly after the device-tree has been unflattened during boot. > + * > + * Prior to that they have statically initialized values from L1_CACHE_*_MIN > + * computed above. > + * > + * NOTE: If the dcache/icache are separate then ucache_* should be zeroed, > + * otherwise dcache == icache == ucache. > + */ > +struct powerpc_caches { > + /* Data cache parameters */ > + u32 dcache_total_bytes; > + u32 dcache_block_bytes; > + u32 dcache_block_shift; > + u32 dcache_blocks_per_page; > + > + /* Instruction cache parameters */ > + u32 icache_total_bytes; > + u32 icache_block_bytes; > + u32 icache_block_shift; > + u32 icache_blocks_per_page; > + > + /* Unified cache parameters (If != 0, all 3 caches must be equal) */ > + u32 ucache_total_bytes; > + u32 ucache_block_bytes; > + u32 ucache_block_shift; > + u32 ucache_blocks_per_page; > +}; > +extern struct powerpc_caches powerpc_caches; > > -#if !defined(__ASSEMBLY__) > #define __read_mostly __attribute__((__section__(".data..read_mostly"))) > -#endif > + > +#endif /* not __ASSEMBLY__ */ > > #endif /* __KERNEL__ */ > #endif /* _ASM_POWERPC_CACHE_H */ > diff --git a/arch/powerpc/include/asm/cacheflush.h > b/arch/powerpc/include/asm/cacheflush.h > index ab9e402..8646443 100644 > --- a/arch/powerpc/include/asm/cacheflush.h > +++ b/arch/powerpc/include/asm/cacheflush.h > @@ -47,12 +47,9 @@ extern void __flush_dcache_icache_phys(unsigned long > physaddr); > #endif /* CONFIG_PPC32 && !CONFIG_BOOKE */ > > extern void flush_dcache_range(unsigned long start, unsigned long stop); > -#ifdef CONFIG_PPC32 > extern void clean_dcache_range(unsigned long start, unsigned long stop); > extern void invalidate_dcache_range(unsigned long start, unsigned long stop); > -#endif /* CONFIG_PPC32 */ > #ifdef CONFIG_PPC64 > -extern void flush_inval_dcache_range(unsigned long start, unsigned long > stop); > extern void flush_dcache_phys_range(unsigned long start, unsigned long stop); > #endif > > diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h > index dd9c4fd..b2e24ce 100644 > --- a/arch/powerpc/include/asm/page.h > +++ b/arch/powerpc/include/asm/page.h > @@ -286,11 +286,17 @@ static inline int hugepd_ok(hugepd_t hpd) > #endif /* CONFIG_HUGETLB_PAGE */ > > struct page; > +extern void clear_pages(void *page, int order); > extern void clear_user_page(void *page, unsigned long vaddr, struct page > *pg); > extern void copy_user_page(void *to, void *from, unsigned long vaddr, > struct page *p); > extern int page_is_ram(unsigned long pfn); > > +static inline void clear_page(void *page) > +{ > + clear_pages(page, 0); > +} > + > #ifdef CONFIG_PPC_SMLPAR > void arch_free_page(struct page *page, int order); > #define HAVE_ARCH_FREE_PAGE > diff --git a/arch/powerpc/include/asm/page_32.h > b/arch/powerpc/include/asm/page_32.h > index 68d73b2..12ae694 100644 > --- a/arch/powerpc/include/asm/page_32.h > +++ b/arch/powerpc/include/asm/page_32.h > @@ -10,7 +10,7 @@ > #define VM_DATA_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS32 > > #ifdef CONFIG_NOT_COHERENT_CACHE > -#define ARCH_DMA_MINALIGN L1_CACHE_BYTES > +#define ARCH_DMA_MINALIGN L1_CACHE_BYTES_MAX > #endif > > #ifdef CONFIG_PTE_64BIT > @@ -37,8 +37,6 @@ typedef unsigned long pte_basic_t; > #endif > > struct page; > -extern void clear_pages(void *page, int order); > -static inline void clear_page(void *page) { clear_pages(page, 0); } > extern void copy_page(void *to, void *from); > > #include <asm-generic/getorder.h> > diff --git a/arch/powerpc/include/asm/page_64.h > b/arch/powerpc/include/asm/page_64.h > index fb40ede..7e156f6 100644 > --- a/arch/powerpc/include/asm/page_64.h > +++ b/arch/powerpc/include/asm/page_64.h > @@ -42,23 +42,6 @@ > > typedef unsigned long pte_basic_t; > > -static __inline__ void clear_page(void *addr) > -{ > - unsigned long lines, line_size; > - > - line_size = ppc64_caches.dline_size; > - lines = ppc64_caches.dlines_per_page; > - > - __asm__ __volatile__( > - "mtctr %1 # clear_page\n\ > -1: dcbz 0,%0\n\ > - add %0,%0,%3\n\ > - bdnz+ 1b" > - : "=r" (addr) > - : "r" (lines), "0" (addr), "r" (line_size) > - : "ctr", "memory"); > -} > - > extern void copy_page(void *to, void *from); > > /* Log 2 of page table size */ > diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c > index 8184ee9..debfb99 100644 > --- a/arch/powerpc/kernel/align.c > +++ b/arch/powerpc/kernel/align.c > @@ -233,14 +233,9 @@ static inline unsigned make_dsisr(unsigned instr) > */ > static int emulate_dcbz(struct pt_regs *regs, unsigned char __user *addr) > { > + int i, size = powerpc_caches.dcache_block_bytes; > long __user *p; > - int i, size; > > -#ifdef __powerpc64__ > - size = ppc64_caches.dline_size; > -#else > - size = L1_CACHE_BYTES; > -#endif > p = (long __user *) (regs->dar & -size); > if (user_mode(regs) && !access_ok(VERIFY_WRITE, p, size)) > return -EFAULT; > diff --git a/arch/powerpc/kernel/asm-offsets.c > b/arch/powerpc/kernel/asm-offsets.c > index 7c5324f..505b25a 100644 > --- a/arch/powerpc/kernel/asm-offsets.c > +++ b/arch/powerpc/kernel/asm-offsets.c > @@ -126,13 +126,14 @@ int main(void) > DEFINE(TI_TASK, offsetof(struct thread_info, task)); > DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); > > + DEFINE(DCACHE_BLOCK_SHIFT, offsetof(struct powerpc_caches, > dcache_block_shift)); > + DEFINE(DCACHE_BLOCK_BYTES, offsetof(struct powerpc_caches, > dcache_block_bytes)); > + DEFINE(DCACHE_BLOCKS_PER_PAGE, offsetof(struct powerpc_caches, > dcache_blocks_per_page)); > + DEFINE(ICACHE_BLOCK_SHIFT, offsetof(struct powerpc_caches, > icache_block_shift)); > + DEFINE(ICACHE_BLOCK_BYTES, offsetof(struct powerpc_caches, > icache_block_bytes)); > + DEFINE(ICACHE_BLOCKS_PER_PAGE, offsetof(struct powerpc_caches, > icache_blocks_per_page)); > + > #ifdef CONFIG_PPC64 > - DEFINE(DCACHEL1LINESIZE, offsetof(struct ppc64_caches, dline_size)); > - DEFINE(DCACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, > log_dline_size)); > - DEFINE(DCACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, > dlines_per_page)); > - DEFINE(ICACHEL1LINESIZE, offsetof(struct ppc64_caches, iline_size)); > - DEFINE(ICACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, > log_iline_size)); > - DEFINE(ICACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, > ilines_per_page)); > /* paca */ > DEFINE(PACA_SIZE, sizeof(struct paca_struct)); > DEFINE(PACA_LOCK_TOKEN, offsetof(struct paca_struct, lock_token)); > diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S > index 0654dba..8abc44a 100644 > --- a/arch/powerpc/kernel/head_32.S > +++ b/arch/powerpc/kernel/head_32.S > @@ -786,7 +786,14 @@ relocate_kernel: > _ENTRY(copy_and_flush) > addi r5,r5,-4 > addi r6,r6,-4 > -4: li r0,L1_CACHE_BYTES/4 > +4: li r0,L1_CACHE_BYTES_MIN/4 /* Use the smallest common */ > + /* denominator cache line */ > + /* size. This results in */ > + /* extra cache line flushes */ > + /* but operation is correct. */ > + /* Can't get cache line size */ > + /* from device-tree yet */ > + > mtctr r0 > 3: addi r6,r6,4 /* copy a cache line */ > lwzx r0,r6,r4 > diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S > index 06c7251..183d371 100644 > --- a/arch/powerpc/kernel/head_64.S > +++ b/arch/powerpc/kernel/head_64.S > @@ -480,7 +480,7 @@ p_end: .llong _end - _stext > _GLOBAL(copy_and_flush) > addi r5,r5,-8 > addi r6,r6,-8 > -4: li r0,8 /* Use the smallest common */ > +4: li r0,L1_CACHE_BYTES_MIN/8 /* Use the smallest common */ > /* denominator cache line */ > /* size. This results in */ > /* extra cache line flushes */ > diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S > index f7d760a..ee61600 100644 > --- a/arch/powerpc/kernel/misc_32.S > +++ b/arch/powerpc/kernel/misc_32.S > @@ -321,199 +321,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_UNIFIED_ID_CACHE) > blr > > /* > - * Write any modified data cache blocks out to memory > - * and invalidate the corresponding instruction cache blocks. > - * This is a no-op on the 601. > - * > - * flush_icache_range(unsigned long start, unsigned long stop) > - */ > -_KPROBE(__flush_icache_range) > -BEGIN_FTR_SECTION > - blr /* for 601, do nothing */ > -END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) > - li r5,L1_CACHE_BYTES-1 > - andc r3,r3,r5 > - subf r4,r3,r4 > - add r4,r4,r5 > - srwi. r4,r4,L1_CACHE_SHIFT > - beqlr > - mtctr r4 > - mr r6,r3 > -1: dcbst 0,r3 > - addi r3,r3,L1_CACHE_BYTES > - bdnz 1b > - sync /* wait for dcbst's to get to ram */ > -#ifndef CONFIG_44x > - mtctr r4 > -2: icbi 0,r6 > - addi r6,r6,L1_CACHE_BYTES > - bdnz 2b > -#else > - /* Flash invalidate on 44x because we are passed kmapped addresses and > - this doesn't work for userspace pages due to the virtually tagged > - icache. Sigh. */ > - iccci 0, r0 > -#endif > - sync /* additional sync needed on g4 */ > - isync > - blr > -/* > - * Write any modified data cache blocks out to memory. > - * Does not invalidate the corresponding cache lines (especially for > - * any corresponding instruction cache). > - * > - * clean_dcache_range(unsigned long start, unsigned long stop) > - */ > -_GLOBAL(clean_dcache_range) > - li r5,L1_CACHE_BYTES-1 > - andc r3,r3,r5 > - subf r4,r3,r4 > - add r4,r4,r5 > - srwi. r4,r4,L1_CACHE_SHIFT > - beqlr > - mtctr r4 > - > -1: dcbst 0,r3 > - addi r3,r3,L1_CACHE_BYTES > - bdnz 1b > - sync /* wait for dcbst's to get to ram */ > - blr > - > -/* > - * Write any modified data cache blocks out to memory and invalidate them. > - * Does not invalidate the corresponding instruction cache blocks. > - * > - * flush_dcache_range(unsigned long start, unsigned long stop) > - */ > -_GLOBAL(flush_dcache_range) > - li r5,L1_CACHE_BYTES-1 > - andc r3,r3,r5 > - subf r4,r3,r4 > - add r4,r4,r5 > - srwi. r4,r4,L1_CACHE_SHIFT > - beqlr > - mtctr r4 > - > -1: dcbf 0,r3 > - addi r3,r3,L1_CACHE_BYTES > - bdnz 1b > - sync /* wait for dcbst's to get to ram */ > - blr > - > -/* > - * Like above, but invalidate the D-cache. This is used by the 8xx > - * to invalidate the cache so the PPC core doesn't get stale data > - * from the CPM (no cache snooping here :-). > - * > - * invalidate_dcache_range(unsigned long start, unsigned long stop) > - */ > -_GLOBAL(invalidate_dcache_range) > - li r5,L1_CACHE_BYTES-1 > - andc r3,r3,r5 > - subf r4,r3,r4 > - add r4,r4,r5 > - srwi. r4,r4,L1_CACHE_SHIFT > - beqlr > - mtctr r4 > - > -1: dcbi 0,r3 > - addi r3,r3,L1_CACHE_BYTES > - bdnz 1b > - sync /* wait for dcbi's to get to ram */ > - blr > - > -/* > - * Flush a particular page from the data cache to RAM. > - * Note: this is necessary because the instruction cache does *not* > - * snoop from the data cache. > - * This is a no-op on the 601 which has a unified cache. > - * > - * void __flush_dcache_icache(void *page) > - */ > -_GLOBAL(__flush_dcache_icache) > -BEGIN_FTR_SECTION > - blr > -END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) > - rlwinm r3,r3,0,0,31-PAGE_SHIFT /* Get page base address */ > - li r4,PAGE_SIZE/L1_CACHE_BYTES /* Number of lines in a page */ > - mtctr r4 > - mr r6,r3 > -0: dcbst 0,r3 /* Write line to ram */ > - addi r3,r3,L1_CACHE_BYTES > - bdnz 0b > - sync > -#ifdef CONFIG_44x > - /* We don't flush the icache on 44x. Those have a virtual icache > - * and we don't have access to the virtual address here (it's > - * not the page vaddr but where it's mapped in user space). The > - * flushing of the icache on these is handled elsewhere, when > - * a change in the address space occurs, before returning to > - * user space > - */ > -BEGIN_MMU_FTR_SECTION > - blr > -END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_44x) > -#endif /* CONFIG_44x */ > - mtctr r4 > -1: icbi 0,r6 > - addi r6,r6,L1_CACHE_BYTES > - bdnz 1b > - sync > - isync > - blr > - > -#ifndef CONFIG_BOOKE > -/* > - * Flush a particular page from the data cache to RAM, identified > - * by its physical address. We turn off the MMU so we can just use > - * the physical address (this may be a highmem page without a kernel > - * mapping). > - * > - * void __flush_dcache_icache_phys(unsigned long physaddr) > - */ > -_GLOBAL(__flush_dcache_icache_phys) > -BEGIN_FTR_SECTION > - blr /* for 601, do nothing */ > -END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) > - mfmsr r10 > - rlwinm r0,r10,0,28,26 /* clear DR */ > - mtmsr r0 > - isync > - rlwinm r3,r3,0,0,31-PAGE_SHIFT /* Get page base address */ > - li r4,PAGE_SIZE/L1_CACHE_BYTES /* Number of lines in a page */ > - mtctr r4 > - mr r6,r3 > -0: dcbst 0,r3 /* Write line to ram */ > - addi r3,r3,L1_CACHE_BYTES > - bdnz 0b > - sync > - mtctr r4 > -1: icbi 0,r6 > - addi r6,r6,L1_CACHE_BYTES > - bdnz 1b > - sync > - mtmsr r10 /* restore DR */ > - isync > - blr > -#endif /* CONFIG_BOOKE */ > - > -/* > - * Clear pages using the dcbz instruction, which doesn't cause any > - * memory traffic (except to write out any cache lines which get > - * displaced). This only works on cacheable memory. > - * > - * void clear_pages(void *page, int order) ; > - */ > -_GLOBAL(clear_pages) > - li r0,PAGE_SIZE/L1_CACHE_BYTES > - slw r0,r0,r4 > - mtctr r0 > -1: dcbz 0,r3 > - addi r3,r3,L1_CACHE_BYTES > - bdnz 1b > - blr > - > -/* > * Copy a whole page. We use the dcbz instruction on the destination > * to reduce memory traffic (it eliminates the unnecessary reads of > * the destination into cache). This requires that the destination > diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S > index 616921e..500fd61 100644 > --- a/arch/powerpc/kernel/misc_64.S > +++ b/arch/powerpc/kernel/misc_64.S > @@ -53,188 +53,6 @@ _GLOBAL(call_handle_irq) > mtlr r0 > blr > > - .section ".toc","aw" > -PPC64_CACHES: > - .tc ppc64_caches[TC],ppc64_caches > - .section ".text" > - > -/* > - * Write any modified data cache blocks out to memory > - * and invalidate the corresponding instruction cache blocks. > - * > - * flush_icache_range(unsigned long start, unsigned long stop) > - * > - * flush all bytes from start through stop-1 inclusive > - */ > - > -_KPROBE(__flush_icache_range) > - > -/* > - * Flush the data cache to memory > - * > - * Different systems have different cache line sizes > - * and in some cases i-cache and d-cache line sizes differ from > - * each other. > - */ > - ld r10,PPC64_CACHES@toc(r2) > - lwz r7,DCACHEL1LINESIZE(r10)/* Get cache line size */ > - addi r5,r7,-1 > - andc r6,r3,r5 /* round low to line bdy */ > - subf r8,r6,r4 /* compute length */ > - add r8,r8,r5 /* ensure we get enough */ > - lwz r9,DCACHEL1LOGLINESIZE(r10) /* Get log-2 of cache line size > */ > - srw. r8,r8,r9 /* compute line count */ > - beqlr /* nothing to do? */ > - mtctr r8 > -1: dcbst 0,r6 > - add r6,r6,r7 > - bdnz 1b > - sync > - > -/* Now invalidate the instruction cache */ > - > - lwz r7,ICACHEL1LINESIZE(r10) /* Get Icache line size */ > - addi r5,r7,-1 > - andc r6,r3,r5 /* round low to line bdy */ > - subf r8,r6,r4 /* compute length */ > - add r8,r8,r5 > - lwz r9,ICACHEL1LOGLINESIZE(r10) /* Get log-2 of Icache line > size */ > - srw. r8,r8,r9 /* compute line count */ > - beqlr /* nothing to do? */ > - mtctr r8 > -2: icbi 0,r6 > - add r6,r6,r7 > - bdnz 2b > - isync > - blr > - .previous .text > -/* > - * Like above, but only do the D-cache. > - * > - * flush_dcache_range(unsigned long start, unsigned long stop) > - * > - * flush all bytes from start to stop-1 inclusive > - */ > -_GLOBAL(flush_dcache_range) > - > -/* > - * Flush the data cache to memory > - * > - * Different systems have different cache line sizes > - */ > - ld r10,PPC64_CACHES@toc(r2) > - lwz r7,DCACHEL1LINESIZE(r10) /* Get dcache line size */ > - addi r5,r7,-1 > - andc r6,r3,r5 /* round low to line bdy */ > - subf r8,r6,r4 /* compute length */ > - add r8,r8,r5 /* ensure we get enough */ > - lwz r9,DCACHEL1LOGLINESIZE(r10) /* Get log-2 of dcache line > size */ > - srw. r8,r8,r9 /* compute line count */ > - beqlr /* nothing to do? */ > - mtctr r8 > -0: dcbst 0,r6 > - add r6,r6,r7 > - bdnz 0b > - sync > - blr > - > -/* > - * Like above, but works on non-mapped physical addresses. > - * Use only for non-LPAR setups ! It also assumes real mode > - * is cacheable. Used for flushing out the DART before using > - * it as uncacheable memory > - * > - * flush_dcache_phys_range(unsigned long start, unsigned long stop) > - * > - * flush all bytes from start to stop-1 inclusive > - */ > -_GLOBAL(flush_dcache_phys_range) > - ld r10,PPC64_CACHES@toc(r2) > - lwz r7,DCACHEL1LINESIZE(r10) /* Get dcache line size */ > - addi r5,r7,-1 > - andc r6,r3,r5 /* round low to line bdy */ > - subf r8,r6,r4 /* compute length */ > - add r8,r8,r5 /* ensure we get enough */ > - lwz r9,DCACHEL1LOGLINESIZE(r10) /* Get log-2 of dcache line > size */ > - srw. r8,r8,r9 /* compute line count */ > - beqlr /* nothing to do? */ > - mfmsr r5 /* Disable MMU Data Relocation */ > - ori r0,r5,MSR_DR > - xori r0,r0,MSR_DR > - sync > - mtmsr r0 > - sync > - isync > - mtctr r8 > -0: dcbst 0,r6 > - add r6,r6,r7 > - bdnz 0b > - sync > - isync > - mtmsr r5 /* Re-enable MMU Data Relocation */ > - sync > - isync > - blr > - > -_GLOBAL(flush_inval_dcache_range) > - ld r10,PPC64_CACHES@toc(r2) > - lwz r7,DCACHEL1LINESIZE(r10) /* Get dcache line size */ > - addi r5,r7,-1 > - andc r6,r3,r5 /* round low to line bdy */ > - subf r8,r6,r4 /* compute length */ > - add r8,r8,r5 /* ensure we get enough */ > - lwz r9,DCACHEL1LOGLINESIZE(r10)/* Get log-2 of dcache line size */ > - srw. r8,r8,r9 /* compute line count */ > - beqlr /* nothing to do? */ > - sync > - isync > - mtctr r8 > -0: dcbf 0,r6 > - add r6,r6,r7 > - bdnz 0b > - sync > - isync > - blr > - > - > -/* > - * Flush a particular page from the data cache to RAM. > - * Note: this is necessary because the instruction cache does *not* > - * snoop from the data cache. > - * > - * void __flush_dcache_icache(void *page) > - */ > -_GLOBAL(__flush_dcache_icache) > -/* > - * Flush the data cache to memory > - * > - * Different systems have different cache line sizes > - */ > - > -/* Flush the dcache */ > - ld r7,PPC64_CACHES@toc(r2) > - clrrdi r3,r3,PAGE_SHIFT /* Page align */ > - lwz r4,DCACHEL1LINESPERPAGE(r7) /* Get # dcache lines per page > */ > - lwz r5,DCACHEL1LINESIZE(r7) /* Get dcache line size */ > - mr r6,r3 > - mtctr r4 > -0: dcbst 0,r6 > - add r6,r6,r5 > - bdnz 0b > - sync > - > -/* Now invalidate the icache */ > - > - lwz r4,ICACHEL1LINESPERPAGE(r7) /* Get # icache lines per page > */ > - lwz r5,ICACHEL1LINESIZE(r7) /* Get icache line size */ > - mtctr r4 > -1: icbi 0,r3 > - add r3,r3,r5 > - bdnz 1b > - isync > - blr > - > - > #if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE) > /* > * Do an IO access in real mode > diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c > index acba8ce..ccdceb7 100644 > --- a/arch/powerpc/kernel/ppc_ksyms.c > +++ b/arch/powerpc/kernel/ppc_ksyms.c > @@ -53,7 +53,6 @@ extern void program_check_exception(struct pt_regs *regs); > extern void single_step_exception(struct pt_regs *regs); > extern int sys_sigreturn(struct pt_regs *regs); > > -EXPORT_SYMBOL(clear_pages); > EXPORT_SYMBOL(ISA_DMA_THRESHOLD); > EXPORT_SYMBOL(DMA_MODE_READ); > EXPORT_SYMBOL(DMA_MODE_WRITE); > @@ -113,8 +112,6 @@ EXPORT_SYMBOL(giveup_spe); > #ifndef CONFIG_PPC64 > EXPORT_SYMBOL(flush_instruction_cache); > #endif > -EXPORT_SYMBOL(__flush_icache_range); > -EXPORT_SYMBOL(flush_dcache_range); > > #ifdef CONFIG_SMP > #ifdef CONFIG_PPC32 > diff --git a/arch/powerpc/kernel/setup-common.c > b/arch/powerpc/kernel/setup-common.c > index 77bb77d..3abfea4 100644 > --- a/arch/powerpc/kernel/setup-common.c > +++ b/arch/powerpc/kernel/setup-common.c > @@ -83,6 +83,54 @@ unsigned long klimit = (unsigned long) _end; > char cmd_line[COMMAND_LINE_SIZE]; > > /* > + * Initialize these values to minimum safe defaults in case they need to be > + * used early during the boot process. While this may not seem safe, it is > + * actually safe in practice, because all of the kernel loops that use this > + * data operate on whole pages. > + * > + * The PowerPC Book III-E spec documents that the pagesize is an even > + * multiple of the cache block size and the cache blocks are always > + * page-aligned. > + * > + * So, for example, when clearing a whole page there are only two things that > + * can be done wrong with "dcbz": > + * > + * (1) Call "dcbz" with an address outside the page you want to zero. > + * > + * (2) Call "dcbz" too few times to actually hit all of the cachelines, > + * IE: Use a too-large cacheline stride. > + * > + * So as long as we ensure that this number is small enough for the current > + * CPU everything will operate correctly, albeit with a slight performance > + * hit, until we get a chance to parse the device-tree for the right value. > + * > + * NOTE: Userspace expects an exact value, so none of the above applies after > + * the device tree has been unflattened and actual values computed. > + * > + * See arch/powerpc/asm/caches.h for more information. > + */ > +struct powerpc_caches powerpc_caches = { > + /* Data cache sizes */ > + .dcache_total_bytes = 0, /* Unknown */ > + .dcache_block_bytes = L1_CACHE_BYTES_MIN, > + .dcache_block_shift = L1_CACHE_SHIFT_MIN, > + .dcache_blocks_per_page = (PAGE_SIZE >> L1_CACHE_SHIFT_MIN), > + > + /* Instruction cache sizes */ > + .icache_total_bytes = 0, > + .icache_block_bytes = L1_CACHE_BYTES_MIN, > + .icache_block_shift = L1_CACHE_SHIFT_MIN, > + .icache_blocks_per_page = (PAGE_SIZE >> L1_CACHE_SHIFT_MIN), > + > + /* Unified cache (assume cache is split by default) */ > + .ucache_total_bytes = 0, > + .ucache_block_bytes = 0, > + .ucache_block_shift = 0, > + .ucache_blocks_per_page = 0, > +}; > +EXPORT_SYMBOL_GPL(powerpc_caches); > + > +/* > * This still seems to be needed... -- paulus > */ > struct screen_info screen_info = { > @@ -349,6 +397,61 @@ const struct seq_operations cpuinfo_op = { > .show = show_cpuinfo, > }; > > +/* Helper functions to compute various values from a cache block size */ > +static void __init set_dcache_block_data(u32 bytes) > +{ > + u32 shift = __ilog2(bytes); > + powerpc_caches.dcache_block_bytes = bytes; > + powerpc_caches.dcache_block_shift = shift; > + powerpc_caches.dcache_blocks_per_page = (PAGE_SIZE >> shift); > +} > +static void __init set_icache_block_data(u32 bytes) > +{ > + u32 shift = __ilog2(bytes); > + powerpc_caches.icache_block_bytes = bytes; > + powerpc_caches.icache_block_shift = shift; > + powerpc_caches.icache_blocks_per_page = (PAGE_SIZE >> shift); > +} > + > +/* > + * Preinitialize the powerpc_caches structure from the cputable. We will > + * later scan the device-tree for this information, which may be more > + * accurate. > + */ > +void __init initialize_early_cache_info(void) > +{ > + set_dcache_block_data(cur_cpu_spec->dcache_bsize); > + set_icache_block_data(cur_cpu_spec->icache_bsize); > +} > + > +/* > + * Initialize the powerpc_caches structure from the device-tree for use by > + * copy_page(), cache flush routines, and AT_DCACHEBSIZE elf headers. > + * > + * In the unlikely event that the device-tree doesn't have this information, > + * the defaults loaded by initialize_early_cache_info() from the cputable > + * will be used. > + */ > +void __init initialize_cache_info(void) > +{ > + /* Assume that the cache properties are the same across all nodes */ > + struct device_node *np = of_find_node_by_type(NULL, "cpu"); > + u32 value = 0; > + > + /* First check data/instruction cache block sizes */ > + if ( !of_property_read_u32(np, "d-cache-block-size", &value) || > + !of_property_read_u32(np, "d-cache-line-size", &value)) > + set_dcache_block_data(value); > + > + if ( !of_property_read_u32(np, "i-cache-block-size", &value) || > + !of_property_read_u32(np, "i-cache-line-size", &value)) > + set_icache_block_data(value); > + > + /* Also read total cache sizes (no defaults here) */ > + of_property_read_u32(np, "d-cache-size", > &powerpc_caches.dcache_total_bytes); > + of_property_read_u32(np, "i-cache-size", > &powerpc_caches.icache_total_bytes); > +} > + > void __init check_for_initrd(void) > { > #ifdef CONFIG_BLK_DEV_INITRD > diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h > index 4c67ad7..1ae16ec 100644 > --- a/arch/powerpc/kernel/setup.h > +++ b/arch/powerpc/kernel/setup.h > @@ -1,6 +1,7 @@ > #ifndef _POWERPC_KERNEL_SETUP_H > #define _POWERPC_KERNEL_SETUP_H > > +void initialize_cache_info(void); > void check_for_initrd(void); > void do_init_bootmem(void); > void setup_panic(void); > diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c > index c1ce863..1db2bfb 100644 > --- a/arch/powerpc/kernel/setup_32.c > +++ b/arch/powerpc/kernel/setup_32.c > @@ -63,14 +63,6 @@ EXPORT_SYMBOL(vgacon_remap_base); > #endif > > /* > - * These are used in binfmt_elf.c to put aux entries on the stack > - * for each elf executable being started. > - */ > -int dcache_bsize; > -int icache_bsize; > -int ucache_bsize; > - > -/* > * We're called here very early in the boot. We determine the machine > * type and call the appropriate low-level setup functions. > * -- Cort <c...@fsmlabs.com> > @@ -286,10 +278,13 @@ void __init setup_arch(char **cmdline_p) > { > *cmdline_p = cmd_line; > > + initialize_early_cache_info(); > + > /* so udelay does something sensible, assume <= 1000 bogomips */ > loops_per_jiffy = 500000000 / HZ; > > unflatten_device_tree(); > + initialize_cache_info(); > check_for_initrd(); > > if (ppc_md.init_early) > diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c > index 1a9dea8..bb686de 100644 > --- a/arch/powerpc/kernel/setup_64.c > +++ b/arch/powerpc/kernel/setup_64.c > @@ -77,25 +77,6 @@ int boot_cpuid = 0; > int __initdata spinning_secondaries; > u64 ppc64_pft_size; > > -/* Pick defaults since we might want to patch instructions > - * before we've read this from the device tree. > - */ > -struct ppc64_caches ppc64_caches = { > - .dline_size = 0x40, > - .log_dline_size = 6, > - .iline_size = 0x40, > - .log_iline_size = 6 > -}; > -EXPORT_SYMBOL_GPL(ppc64_caches); > - > -/* > - * These are used in binfmt_elf.c to put aux entries on the stack > - * for each elf executable being started. > - */ > -int dcache_bsize; > -int icache_bsize; > -int ucache_bsize; > - > #ifdef CONFIG_SMP > > static char *smt_enabled_cmdline; > @@ -265,82 +246,6 @@ void smp_release_cpus(void) > #endif /* CONFIG_SMP || CONFIG_KEXEC */ > > /* > - * Initialize some remaining members of the ppc64_caches and systemcfg > - * structures > - * (at least until we get rid of them completely). This is mostly some > - * cache informations about the CPU that will be used by cache flush > - * routines and/or provided to userland > - */ > -static void __init initialize_cache_info(void) > -{ > - struct device_node *np; > - unsigned long num_cpus = 0; > - > - DBG(" -> initialize_cache_info()\n"); > - > - for_each_node_by_type(np, "cpu") { > - num_cpus += 1; > - > - /* > - * We're assuming *all* of the CPUs have the same > - * d-cache and i-cache sizes... -Peter > - */ > - if (num_cpus == 1) { > - const u32 *sizep, *lsizep; > - u32 size, lsize; > - > - size = 0; > - lsize = cur_cpu_spec->dcache_bsize; > - sizep = of_get_property(np, "d-cache-size", NULL); > - if (sizep != NULL) > - size = *sizep; > - lsizep = of_get_property(np, "d-cache-block-size", > - NULL); > - /* fallback if block size missing */ > - if (lsizep == NULL) > - lsizep = of_get_property(np, > - "d-cache-line-size", > - NULL); > - if (lsizep != NULL) > - lsize = *lsizep; > - if (sizep == 0 || lsizep == 0) > - DBG("Argh, can't find dcache properties ! " > - "sizep: %p, lsizep: %p\n", sizep, lsizep); > - > - ppc64_caches.dsize = size; > - ppc64_caches.dline_size = lsize; > - ppc64_caches.log_dline_size = __ilog2(lsize); > - ppc64_caches.dlines_per_page = PAGE_SIZE / lsize; > - > - size = 0; > - lsize = cur_cpu_spec->icache_bsize; > - sizep = of_get_property(np, "i-cache-size", NULL); > - if (sizep != NULL) > - size = *sizep; > - lsizep = of_get_property(np, "i-cache-block-size", > - NULL); > - if (lsizep == NULL) > - lsizep = of_get_property(np, > - "i-cache-line-size", > - NULL); > - if (lsizep != NULL) > - lsize = *lsizep; > - if (sizep == 0 || lsizep == 0) > - DBG("Argh, can't find icache properties ! " > - "sizep: %p, lsizep: %p\n", sizep, lsizep); > - > - ppc64_caches.isize = size; > - ppc64_caches.iline_size = lsize; > - ppc64_caches.log_iline_size = __ilog2(lsize); > - ppc64_caches.ilines_per_page = PAGE_SIZE / lsize; > - } > - } > - > - DBG(" <- initialize_cache_info()\n"); > -} > - > - > -/* > * Do some initial setup of the system. The parameters are those which > * were passed in from the bootloader. > */ > @@ -365,10 +270,7 @@ void __init setup_system(void) > */ > unflatten_device_tree(); > > - /* > - * Fill the ppc64_caches & systemcfg structures with informations > - * retrieved from the device-tree. > - */ > + /* Fill the powerpc_caches structure with device-tree data */ > initialize_cache_info(); > > #ifdef CONFIG_PPC_RTAS > @@ -423,12 +325,10 @@ void __init setup_system(void) > printk("-----------------------------------------------------\n"); > printk("ppc64_pft_size = 0x%llx\n", ppc64_pft_size); > printk("physicalMemorySize = 0x%llx\n", > memblock_phys_mem_size()); > - if (ppc64_caches.dline_size != 0x80) > - printk("ppc64_caches.dcache_line_size = 0x%x\n", > - ppc64_caches.dline_size); > - if (ppc64_caches.iline_size != 0x80) > - printk("ppc64_caches.icache_line_size = 0x%x\n", > - ppc64_caches.iline_size); > + if (powerpc_caches.dcache_block_bytes != 0x80) > + printk("dcache_block_bytes = 0x%x\n", > powerpc_caches.dcache_block_bytes); > + if (powerpc_caches.icache_block_bytes != 0x80) > + printk("icache_block_bytes = 0x%x\n", > powerpc_caches.icache_block_bytes); > #ifdef CONFIG_PPC_STD_MMU_64 > if (htab_address) > printk("htab_address = 0x%p\n", htab_address); > @@ -545,13 +445,7 @@ void __init setup_arch(char **cmdline_p) > > *cmdline_p = cmd_line; > > - /* > - * Set cache line size based on type of cpu as a default. > - * Systems with OF can look in the properties on the cpu node(s) > - * for a possibly more accurate value. > - */ > - dcache_bsize = ppc64_caches.dline_size; > - icache_bsize = ppc64_caches.iline_size; > + initialize_early_cache_info(); > > /* reboot on panic */ > panic_timeout = 180; > diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c > index 7d14bb6..4a038fb 100644 > --- a/arch/powerpc/kernel/vdso.c > +++ b/arch/powerpc/kernel/vdso.c > @@ -726,6 +726,7 @@ static int __init vdso_init(void) > vdso_data->version.major = SYSTEMCFG_MAJOR; > vdso_data->version.minor = SYSTEMCFG_MINOR; > vdso_data->processor = mfspr(SPRN_PVR); > + > /* > * Fake the old platform number for pSeries and iSeries and add > * in LPAR bit if necessary > @@ -734,29 +735,25 @@ static int __init vdso_init(void) > if (firmware_has_feature(FW_FEATURE_LPAR)) > vdso_data->platform |= 1; > vdso_data->physicalMemorySize = memblock_phys_mem_size(); > - vdso_data->dcache_size = ppc64_caches.dsize; > - vdso_data->dcache_line_size = ppc64_caches.dline_size; > - vdso_data->icache_size = ppc64_caches.isize; > - vdso_data->icache_line_size = ppc64_caches.iline_size; > > - /* XXXOJN: Blocks should be added to ppc64_caches and used instead */ > - vdso_data->dcache_block_size = ppc64_caches.dline_size; > - vdso_data->icache_block_size = ppc64_caches.iline_size; > - vdso_data->dcache_log_block_size = ppc64_caches.log_dline_size; > - vdso_data->icache_log_block_size = ppc64_caches.log_iline_size; > + /* There are more cache parameters saved for 64-bit than 32-bit */ > + vdso_data->dcache_size = powerpc_caches.dcache_total_size; > + vdso_data->icache_size = powerpc_caches.icache_total_size; > + vdso_data->dcache_line_size = powerpc_caches.dcache_block_bytes; > + vdso_data->icache_line_size = powerpc_caches.icache_block_bytes; > > /* > * Calculate the size of the 64 bits vDSO > */ > vdso64_pages = (&vdso64_end - &vdso64_start) >> PAGE_SHIFT; > DBG("vdso64_kbase: %p, 0x%x pages\n", vdso64_kbase, vdso64_pages); > -#else > - vdso_data->dcache_block_size = L1_CACHE_BYTES; > - vdso_data->dcache_log_block_size = L1_CACHE_SHIFT; > - vdso_data->icache_block_size = L1_CACHE_BYTES; > - vdso_data->icache_log_block_size = L1_CACHE_SHIFT; > -#endif /* CONFIG_PPC64 */ > +#endif > > + /* Save the cache-block sizes for the VDSO */ > + vdso_data->dcache_block_size = powerpc_caches.dcache_block_bytes; > + vdso_data->icache_block_size = powerpc_caches.icache_block_bytes; > + vdso_data->dcache_log_block_size = powerpc_caches.dcache_block_shift; > + vdso_data->icache_log_block_size = powerpc_caches.icache_block_shift; > > /* > * Calculate the size of the 32 bits vDSO > diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S > index 53dcb6b..c466977 100644 > --- a/arch/powerpc/lib/copypage_64.S > +++ b/arch/powerpc/lib/copypage_64.S > @@ -12,17 +12,17 @@ > #include <asm/asm-offsets.h> > > .section ".toc","aw" > -PPC64_CACHES: > - .tc ppc64_caches[TC],ppc64_caches > +POWERPC_CACHES: > + .tc powerpc_caches[TC],powerpc_caches > .section ".text" > > _GLOBAL(copy_page) > lis r5,PAGE_SIZE@h > ori r5,r5,PAGE_SIZE@l > BEGIN_FTR_SECTION > - ld r10,PPC64_CACHES@toc(r2) > - lwz r11,DCACHEL1LOGLINESIZE(r10) /* log2 of cache line size */ > - lwz r12,DCACHEL1LINESIZE(r10) /* get cache line size */ > + ld r10,POWERPC_CACHES@toc(r2) > + lwz r11,DCACHE_BLOCK_SHIFT(r10) /* log2 of cache line size */ > + lwz r12,DCACHE_BLOCK_BYTES(r10) /* get cache line size */ > li r9,0 > srd r8,r5,r11 > > diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile > index 991ee81..8ad36a9 100644 > --- a/arch/powerpc/mm/Makefile > +++ b/arch/powerpc/mm/Makefile > @@ -6,7 +6,7 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror > > ccflags-$(CONFIG_PPC64) := -mno-minimal-toc > > -obj-y := fault.o mem.o pgtable.o gup.o \ > +obj-y := cache.o fault.o mem.o pgtable.o > gup.o \ > init_$(CONFIG_WORD_SIZE).o \ > pgtable_$(CONFIG_WORD_SIZE).o > obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \ > diff --git a/arch/powerpc/mm/cache.c b/arch/powerpc/mm/cache.c > new file mode 100644 > index 0000000..0fbf2d6 > --- /dev/null > +++ b/arch/powerpc/mm/cache.c > @@ -0,0 +1,279 @@ > +#include <linux/kprobes.h> > +#include <linux/export.h> > +#include <linux/types.h> > + > +#include <asm/cputable.h> > +#include <asm/system.h> > +#include <asm/cache.h> > +#include <asm/page.h> > +#include <asm/mmu.h> > + > +/* > + * Write any modified data cache blocks out to memory. > + * Does not invalidate the corresponding cache lines (especially for > + * any corresponding instruction cache). > + */ > +void clean_dcache_range(unsigned long start, unsigned long stop) > +{ > + unsigned long addr; > + FOR_EACH_CACHELINE(addr, start, stop, dcache) > + dcbst(addr); > + mb(); > +} > + > +/* > + * Write any modified data cache blocks out to memory and invalidate them. > + * Does not invalidate the corresponding instruction cache blocks. > + */ > +void flush_dcache_range(unsigned long start, unsigned long stop) > +{ > + unsigned long addr; > + FOR_EACH_CACHELINE(addr, start, stop, dcache) > + dcbf(addr); > + mb(); > +} > +EXPORT_SYMBOL(flush_dcache_range); > + > +/* > + * Like above, but invalidate the D-cache. This is used by the 8xx > + * to invalidate the cache so the PPC core doesn't get stale data > + * from the CPM (no cache snooping here :-). > + * > + * invalidate_dcache_range(unsigned long start, unsigned long stop) > + */ > +void invalidate_dcache_range(unsigned long start, unsigned long stop) > +{ > + unsigned long addr; > + FOR_EACH_CACHELINE(addr, start, stop, dcache) > + dcbi(addr); > + mb(); > +} > + > +/* > + * Unfortunately, we cannot flush individual chunks of the icache on 44x as > + * we are passed kmapped addresses and we have a virtually-tagged icache. > + * > + * The only workaround is to invalidate the whole icache. > + * > + * NOTE: The CPU does not use the operands for this instruction, so > + * they are passed as dummies. > + */ > +__kprobes void __flush_icache_range(unsigned long start, unsigned long stop) > +{ > + unsigned long addr; > + > + if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) > + return; > + > + /* First ensure that data has been written to memory */ > + FOR_EACH_CACHELINE(addr, start, stop, dcache) > + dcbst(addr); > + mb(); > + > +#ifdef CONFIG_44x > + if (mmu_has_feature(MMU_FTR_TYPE_44x)) { > + asm volatile("iccci 0, r0" ::: "memory"); > + return; > + } > +#endif > + > + /* Now discard the corresponding icache */ > + FOR_EACH_CACHELINE(addr, start, stop, icache) > + icbi(addr); > + mb(); > + isync(); > +} > +EXPORT_SYMBOL(__flush_icache_range); > + > +/* > + * Flush a particular page from the data cache to RAM. > + * Note: this is necessary because the instruction cache does *not* > + * snoop from the data cache. > + * This is a no-op on the 601 which has a unified cache. > + * > + * void __flush_dcache_icache(void *page) > + */ > +void __flush_dcache_icache(void *page) > +{ > + unsigned long base = ((unsigned long)page) & ~(PAGE_SIZE-1); > + unsigned long addr; > + > + if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) > + return; > + > + /* First ensure that data has been written to memory */ > + FOR_EACH_CACHELINE(addr, base, base + PAGE_SIZE, dcache) > + dcbst(addr); > + > +#ifdef CONFIG_44x > + /* > + * We don't flush the icache on 44x. Those have a virtual icache and > + * we don't have access to the virtual address here (it's not the > + * page vaddr but where it's mapped in user space). The flushing of > + * the icache on these is handled elsewhere, when a change in the > + * address space occurs, before returning to user space. > + */ > + if (mmu_has_feature(MMU_FTR_TYPE_44x)) > + return; > +#endif > + > + FOR_EACH_CACHELINE(addr, base, base + PAGE_SIZE, icache) > + icbi(addr); > + > + mb(); > + isync(); > +} > + > +/* > + * Clear pages using the dcbz instruction, which doesn't cause any > + * memory traffic (except to write out any cache lines which get > + * displaced). This only works on cacheable memory. > + * > + */ > +void clear_pages(void *page, int order) > +{ > + unsigned long addr, base = (unsigned long)page; > + FOR_EACH_CACHELINE(addr, base, base + (PAGE_SIZE << order), dcache) > + dcbz(addr); > +} > +EXPORT_SYMBOL(clear_pages); > + > +#if defined(CONFIG_PPC32) && !defined(CONFIG_BOOKE) > +/* > + * Flush a particular page from the data cache to RAM, identified > + * by its physical address. We turn off the MMU so we can just use > + * the physical address (this may be a highmem page without a kernel > + * mapping). > + */ > +void __flush_dcache_icache_phys(unsigned long phys_page) > +{ > + u32 d_size = powerpc_caches.dcache_block_bytes; > + u32 i_size = powerpc_caches.icache_block_bytes; > + u32 d_per_page = powerpc_caches.dcache_blocks_per_page; > + u32 i_per_page = powerpc_caches.icache_blocks_per_page; > + > + /* Temporary registers for the ASM to use */ > + unsigned long old_msr, tmp_msr, d_phys_page, i_phys_page; > + > + if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) > + return; > + > + /* Page base address (used in 2 different loops) */ > + d_phys_page = i_phys_page = phys_page & ~(PAGE_SIZE - 1); > + > + /* > + * This part needs to be 100% ASM because we disable the MMU, and we > + * can't accidentally let some C code go poking at memory while the > + * MMU isn't enabled. > + * > + * NOTE: This looks blatantly unsafe with respect to interrupts. > + * Hopefully all the callers provide sufficient protection? > + */ > + asm volatile( > + /* First disable the MMU */ > + "mfmsr %[old_msr]\n\t" > + "rlwinm %[tmp_msr], %[old_msr], 0, 28, 26\n\t" > + "mtmsr %[tmp_msr]\n\t" > + "isync\n\t" > + > + /* Clean the data cache */ > + "mtctr %[d_per_page]\n" > + "0: dcbst 0, %[d_phys_page]\n\t" > + "add %[d_phys_page], %[d_phys_page], %[d_size]\n\t" > + "bdnz 0b\n\t" > + "sync\n\t" > + > + /* Invalidate the instruction cache */ > + "mtctr %[i_per_page]\n" > + "0: icbi 0, %[i_phys_page]\n\t" > + "add %[i_phys_page], %[i_phys_page], %[i_size]\n\t" > + "bdnz 0b\n\t" > + > + /* Finally, re-enable the MMU */ > + "sync\n\t" > + "mtmsr %[old_msr]\n\t" > + "isync\n\t" > + > + /* Temporary variables and inputs */ > + : [old_msr] "=&r" (old_msr), > + [tmp_msr] "=&r" (tmp_msr), > + [d_phys_page] "=b" (d_phys_page), > + [i_phys_page] "=b" (i_phys_page) > + > + /* Inputs */ > + : [d_size] "b" (d_size), > + [i_size] "b" (i_size), > + [d_per_page] "b" (d_per_page), > + [i_per_page] "b" (i_per_page), > + "[d_phys_page]" (d_phys_page), > + "[i_phys_page]" (i_phys_page) > + > + /* Clobbers */ > + : "memory", "c" > + ); > +} > +#endif /* CONFIG_PPC32 && !CONFIG_BOOKE */ > + > +#ifdef CONFIG_PPC64 > +/* > + * Data cache flush that works on non-mapped physical addresses. > + * Use only for non-LPAR setups ! It also assumes real mode > + * is cacheable. Used for flushing out the DART before using > + * it as uncacheable memory > + */ > +void flush_dcache_phys_range(unsigned long start, unsigned long stop) > +{ > + /* System data cache block size */ > + unsigned long bytes = powerpc_caches.dcache_block_bytes; > + unsigned long shift = powerpc_caches.dcache_block_shift; > + > + /* Temporary registers for the ASM to use */ > + unsigned long old_msr, tmp_msr; > + > + /* Compute a start address and number of cachelines */ > + unsigned long phys_addr = start & ~(bytes - 1); > + unsigned long nr_lines = ((stop - phys_addr) + (bytes - 1)) >> shift; > + > + /* > + * This part needs to be 100% ASM because we disable the MMU, and we > + * can't accidentally let some C code go poking at memory while the > + * MMU isn't enabled. > + * > + * NOTE: This looks blatantly unsafe with respect to interrupts. > + * Hopefully all the callers provide sufficient protection? > + */ > + asm volatile( > + /* First disable the MMU */ > + "mfmsr %[old_msr]\n\t" > + "rlwinm %[tmp_msr], %[old_msr], 0, 28, 26\n\t" > + "mtmsr %[tmp_msr]\n\t" > + "isync\n\t" > + > + /* Clean the data cache */ > + "mtctr %[nr_lines]\n" > + "0: dcbst 0, %[phys_addr]\n\t" > + "add %[phys_addr], %[phys_addr], %[bytes]\n\t" > + "bdnz 0b\n\t" > + "sync\n\t" > + "isync\n\t" > + > + /* Finally, re-enable the MMU */ > + "mtmsr %[old_msr]\n\t" > + "sync\n\t" > + "isync\n\t" > + > + /* Temporary variables and inputs */ > + : [old_msr] "=&r" (old_msr), > + [tmp_msr] "=&r" (tmp_msr), > + [phys_addr] "=b" (phys_addr) > + > + /* Inputs */ > + : [bytes] "b" (bytes), > + [nr_lines] "b" (nr_lines), > + "[phys_addr]" (phys_addr) > + > + /* Clobbers */ > + : "memory", "c" > + ); > +} > +#endif /* CONFIG_PPC64 */ > diff --git a/arch/powerpc/mm/dma-noncoherent.c > b/arch/powerpc/mm/dma-noncoherent.c > index 329be36..3823f64 100644 > --- a/arch/powerpc/mm/dma-noncoherent.c > +++ b/arch/powerpc/mm/dma-noncoherent.c > @@ -328,7 +328,7 @@ void __dma_sync(void *vaddr, size_t size, int direction) > * invalidate only when cache-line aligned otherwise there is > * the potential for discarding uncommitted data from the cache > */ > - if ((start & (L1_CACHE_BYTES - 1)) || (size & (L1_CACHE_BYTES - > 1))) > + if ((start | size) & (powerpc_caches.dcache_block_bytes - 1)) > flush_dcache_range(start, end); > else > invalidate_dcache_range(start, end); > diff --git a/arch/powerpc/platforms/52xx/lite5200_sleep.S > b/arch/powerpc/platforms/52xx/lite5200_sleep.S > index 08ab6fe..ac285d9 100644 > --- a/arch/powerpc/platforms/52xx/lite5200_sleep.S > +++ b/arch/powerpc/platforms/52xx/lite5200_sleep.S > @@ -394,11 +394,16 @@ restore_regs: > > > /* cache flushing code. copied from arch/ppc/boot/util.S */ > -#define NUM_CACHE_LINES (128*8) > +#define NUM_CACHE_LINES ((128 * 8) << (L1_CACHE_SHIFT_MAX - > L1_CACHE_SHIFT_MIN)) > > /* > * Flush data cache > * Do this by just reading lots of stuff into the cache. > + * > + * NOTE: This does not handle variable-sized cachelines properly, but since > + * we are just trying to flush the data cache by reading lots of data, > + * this works anyways. We just make sure we read as many cachelines > + * as we could possibly need to overflow the cache on any hardware. > */ > flush_data_cache: > lis r3,CONFIG_KERNEL_START@h > @@ -407,6 +412,6 @@ flush_data_cache: > mtctr r4 > 1: > lwz r4,0(r3) > - addi r3,r3,L1_CACHE_BYTES /* Next line, please */ > + addi r3,r3,L1_CACHE_BYTES_MIN /* Next line, please */ > bdnz 1b > blr > diff --git a/arch/powerpc/platforms/powermac/pci.c > b/arch/powerpc/platforms/powermac/pci.c > index 31a7d3a..8503e38 100644 > --- a/arch/powerpc/platforms/powermac/pci.c > +++ b/arch/powerpc/platforms/powermac/pci.c > @@ -1135,7 +1135,7 @@ int pmac_pci_enable_device_hook(struct pci_dev *dev) > pci_write_config_byte(dev, PCI_LATENCY_TIMER, 16); > > pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE, > - L1_CACHE_BYTES >> 2); > + powerpc_caches.dcache_block_bytes >> 2); > } > > return 0; > diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c > index 03a217a..c537d49 100644 > --- a/arch/powerpc/xmon/xmon.c > +++ b/arch/powerpc/xmon/xmon.c > @@ -26,6 +26,7 @@ > > #include <asm/ptrace.h> > #include <asm/string.h> > +#include <asm/cache.h> > #include <asm/prom.h> > #include <asm/machdep.h> > #include <asm/xmon.h> > @@ -254,16 +255,6 @@ static inline void store_inst(void *p) > asm volatile ("dcbst 0,%0; sync; icbi 0,%0; isync" : : "r" (p)); > } > > -static inline void cflush(void *p) > -{ > - asm volatile ("dcbf 0,%0; icbi 0,%0" : : "r" (p)); > -} > - > -static inline void cinval(void *p) > -{ > - asm volatile ("dcbi 0,%0; icbi 0,%0" : : "r" (p)); > -} > - > /* > * Disable surveillance (the service processor watchdog function) > * while we are in xmon. > @@ -1513,10 +1504,9 @@ static void prregs(struct pt_regs *fp) > > static void cacheflush(void) > { > - int cmd; > - unsigned long nflush; > + unsigned long nflush, i; > > - cmd = inchar(); > + int cmd = inchar(); > if (cmd != 'i') > termch = cmd; > scanhex((void *)&adrs); > @@ -1524,23 +1514,30 @@ static void cacheflush(void) > termch = 0; > nflush = 1; > scanhex(&nflush); > - nflush = (nflush + L1_CACHE_BYTES - 1) / L1_CACHE_BYTES; > - if (setjmp(bus_error_jmp) == 0) { > - catch_memory_errors = 1; > - sync(); > > - if (cmd != 'i') { > - for (; nflush > 0; --nflush, adrs += L1_CACHE_BYTES) > - cflush((void *) adrs); > - } else { > - for (; nflush > 0; --nflush, adrs += L1_CACHE_BYTES) > - cinval((void *) adrs); > - } > - sync(); > - /* wait a little while to see if we get a machine check */ > - __delay(200); > + if (setjmp(bus_error_jmp) != 0) { > + catch_memory_errors = 0; > + return; > } > - catch_memory_errors = 0; > + catch_memory_errors = 1; > + sync(); > + > + /* First flush/invalidate data caches */ > + if (cmd != 'i') { > + FOR_EACH_CACHELINE(i, adrs, adrs + nflush, dcache) > + dcbf(i); > + } else { > + FOR_EACH_CACHELINE(i, adrs, adrs + nflush, dcache) > + dcbi(i); > + } > + > + /* Now invalidate instruction caches */ > + FOR_EACH_CACHELINE(i, adrs, adrs + nflush, icache) > + icbi(i); > + > + sync(); > + /* wait a little while to see if we get a machine check */ > + __delay(200); > } > > static unsigned long > diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c > index 116a49c..04ead15 100644 > --- a/drivers/macintosh/smu.c > +++ b/drivers/macintosh/smu.c > @@ -136,7 +136,9 @@ static void smu_start_cmd(void) > /* Flush command and data to RAM */ > faddr = (unsigned long)smu->cmd_buf; > fend = faddr + smu->cmd_buf->length + 2; > - flush_inval_dcache_range(faddr, fend); > + flush_dcache_range(faddr, fend); > + mb(); > + isync(); > > > /* We also disable NAP mode for the duration of the command > @@ -198,7 +200,9 @@ static irqreturn_t smu_db_intr(int irq, void *arg) > * reply length (it's only 2 cache lines anyway) > */ > faddr = (unsigned long)smu->cmd_buf; > - flush_inval_dcache_range(faddr, faddr + 256); > + flush_dcache_range(faddr, faddr + 256); > + mb(); > + isync(); > > /* Now check ack */ > ack = (~cmd->cmd) & 0xff; _______________________________________________ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev