[RFC PATCH 09/10] POWERPC: smp: remove call to ipi_call_lock()/ipi_call_unlock()
From: Yong Zhang 1) call_function.lock used in smp_call_function_many() is just to protect call_function.queue and &data->refs, cpu_online_mask is outside of the lock. And it's not necessary to protect cpu_online_mask, because data->cpumask is pre-calculate and even if a cpu is brougt up when calling arch_send_call_function_ipi_mask(), it's harmless because validation test in generic_smp_call_function_interrupt() will take care of it. 2) For cpu down issue, stop_machine() will guarantee that no concurrent smp_call_fuction() is processing. Signed-off-by: Yong Zhang Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: linuxppc-dev@lists.ozlabs.org --- arch/powerpc/kernel/smp.c |2 -- 1 files changed, 0 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index e4cb343..e1417c4 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -571,7 +571,6 @@ void __devinit start_secondary(void *unused) if (system_state == SYSTEM_RUNNING) vdso_data->processorCount++; #endif - ipi_call_lock(); notify_cpu_starting(cpu); set_cpu_online(cpu, true); /* Update sibling maps */ @@ -601,7 +600,6 @@ void __devinit start_secondary(void *unused) of_node_put(np); } of_node_put(l2_cache); - ipi_call_unlock(); local_irq_enable(); -- 1.7.5.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v5 1/5] powerpc/85xx: implement hardware timebase sync
Hi Scott, Thanks for the valuable comment raised before and we have updated the patches accordingly. Please review the updated patch set and ACK if they are good to you. We hope it can be applied in this window. Leo On Fri, May 11, 2012 at 7:53 PM, Zhao Chenhui wrote: > Do hardware timebase sync. Firstly, stop all timebases, and transfer > the timebase value of the boot core to the other core. Finally, > start all timebases. > > Only apply to dual-core chips, such as MPC8572, P2020, etc. > > Signed-off-by: Zhao Chenhui > Signed-off-by: Li Yang > --- > arch/powerpc/include/asm/fsl_guts.h | 2 + > arch/powerpc/platforms/85xx/smp.c | 93 > +-- > 2 files changed, 91 insertions(+), 4 deletions(-) > > diff --git a/arch/powerpc/include/asm/fsl_guts.h > b/arch/powerpc/include/asm/fsl_guts.h > index aa4c488..dd5ba2c 100644 > --- a/arch/powerpc/include/asm/fsl_guts.h > +++ b/arch/powerpc/include/asm/fsl_guts.h > @@ -48,6 +48,8 @@ struct ccsr_guts { > __be32 dmuxcr; /* 0x.0068 - DMA Mux Control Register > */ > u8 res06c[0x70 - 0x6c]; > __be32 devdisr; /* 0x.0070 - Device Disable Control */ > +#define CCSR_GUTS_DEVDISR_TB1 0x1000 > +#define CCSR_GUTS_DEVDISR_TB0 0x4000 > __be32 devdisr2; /* 0x.0074 - Device Disable Control 2 */ > u8 res078[0x7c - 0x78]; > __be32 pmjcr; /* 0x.007c - 4 Power Management Jog Control > Register */ > diff --git a/arch/powerpc/platforms/85xx/smp.c > b/arch/powerpc/platforms/85xx/smp.c > index ff42490..6862dda 100644 > --- a/arch/powerpc/platforms/85xx/smp.c > +++ b/arch/powerpc/platforms/85xx/smp.c > @@ -24,6 +24,7 @@ > #include > #include > #include > +#include > > #include > #include > @@ -115,13 +116,70 @@ smp_85xx_kick_cpu(int nr) > > struct smp_ops_t smp_85xx_ops = { > .kick_cpu = smp_85xx_kick_cpu, > -#ifdef CONFIG_KEXEC > - .give_timebase = smp_generic_give_timebase, > - .take_timebase = smp_generic_take_timebase, > -#endif > }; > > #ifdef CONFIG_KEXEC > +static struct ccsr_guts __iomem *guts; > +static u64 timebase; > +static int tb_req; > +static int tb_valid; > + > +static void mpc85xx_timebase_freeze(int freeze) > +{ > + unsigned int mask; > + > + if (!guts) > + return; > + > + mask = CCSR_GUTS_DEVDISR_TB0 | CCSR_GUTS_DEVDISR_TB1; > + if (freeze) > + setbits32(&guts->devdisr, mask); > + else > + clrbits32(&guts->devdisr, mask); > + > + in_be32(&guts->devdisr); > +} > + > +static void mpc85xx_give_timebase(void) > +{ > + unsigned long flags; > + > + local_irq_save(flags); > + > + while (!tb_req) > + barrier(); > + tb_req = 0; > + > + mpc85xx_timebase_freeze(1); > + timebase = get_tb(); > + mb(); > + tb_valid = 1; > + > + while (tb_valid) > + barrier(); > + > + mpc85xx_timebase_freeze(0); > + > + local_irq_restore(flags); > +} > + > +static void mpc85xx_take_timebase(void) > +{ > + unsigned long flags; > + > + local_irq_save(flags); > + > + tb_req = 1; > + while (!tb_valid) > + barrier(); > + > + set_tb(timebase >> 32, timebase & 0x); > + mb(); > + tb_valid = 0; > + > + local_irq_restore(flags); > +} > + > atomic_t kexec_down_cpus = ATOMIC_INIT(0); > > void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary) > @@ -228,6 +286,20 @@ smp_85xx_setup_cpu(int cpu_nr) > doorbell_setup_this_cpu(); > } > > +#ifdef CONFIG_KEXEC > +static const struct of_device_id guts_ids[] = { > + { .compatible = "fsl,mpc8572-guts", }, > + { .compatible = "fsl,mpc8560-guts", }, > + { .compatible = "fsl,mpc8536-guts", }, > + { .compatible = "fsl,p1020-guts", }, > + { .compatible = "fsl,p1021-guts", }, > + { .compatible = "fsl,p1022-guts", }, > + { .compatible = "fsl,p1023-guts", }, > + { .compatible = "fsl,p2020-guts", }, > + {}, > +}; > +#endif > + > void __init mpc85xx_smp_init(void) > { > struct device_node *np; > @@ -249,6 +321,19 @@ void __init mpc85xx_smp_init(void) > smp_85xx_ops.cause_ipi = doorbell_cause_ipi; > } > > +#ifdef CONFIG_KEXEC > + np = of_find_matching_node(NULL, guts_ids); > + if (np) { > + guts = of_iomap(np, 0); > + smp_85xx_ops.give_timebase = mpc85xx_give_timebase; > + smp_85xx_ops.take_timebase = mpc85xx_take_timebase; > + of_node_put(np); > + } else { > + smp_85xx_ops.give_timebase = smp_generic_give_timebase; > + smp_85xx_ops.take_timebase = smp_generic_take_timebase; > + } > +#endif > + > smp_ops = &smp_85xx_ops; > > #ifdef CONFIG_KEXEC > -- > 1.6.4.1 > > > -- > To unsubscribe from this list: send the line "unsubscribe linux-kerne
[PATCH] powerpc: Use enhanced touch instructions in POWER7 copy_to_user/copy_from_user
Version 2.06 of the POWER ISA introduced enhanced touch instructions, allowing us to specify a number of attributes including the length of a stream. This patch adds a software stream for both loads and stores in the POWER7 copy_tofrom_user loop. Since the setup is quite complicated and we have to use an eieio to ensure correct ordering of the "GO" command we only do this for copies above 4kB. To quantify any performance improvements we need a working set bigger than the caches so we operate on a 1GB file: # dd if=/dev/zero of=/tmp/foo bs=1M count=1024 And we compare how fast we can read the file: # dd if=/tmp/foo of=/dev/null bs=1M before: 7.7 GB/s after: 9.6 GB/s A 25% improvement. The worst case for this patch will be a completely L1 cache contained copy of just over 4kB. We can test this with the copy_to_user testcase we used to tune copy_tofrom_user originally: http://ozlabs.org/~anton/junkcode/copy_to_user.c # time ./copy_to_user2 -l 4224 -i 1000 before: 6.807 s after: 6.946 s A 2% slowdown, which seems reasonable considering our data is unlikely to be completely L1 contained. Signed-off-by: Anton Blanchard --- Index: linux-build/arch/powerpc/lib/copyuser_power7.S === --- linux-build.orig/arch/powerpc/lib/copyuser_power7.S 2012-03-01 16:15:31.073813523 +1100 +++ linux-build/arch/powerpc/lib/copyuser_power7.S 2012-05-29 16:14:44.129704887 +1000 @@ -298,6 +298,37 @@ err1; stb r0,0(r3) ld r5,STACKFRAMESIZE+64(r1) mtlrr0 + /* +* We prefetch both the source and destination using enhanced touch +* instructions. We use a stream ID of 0 for the load side and +* 1 for the store side. +*/ + clrrdi r6,r4,7 + clrrdi r9,r3,7 + ori r9,r9,1 /* stream=1 */ + + srdir7,r5,7 /* length in cachelines, capped at 0x3FF */ + cmpldi r7,0x3FF + ble 1f + li r7,0x3FF +1: lis r0,0x0E00 /* depth=7 */ + sldir7,r7,7 + or r7,r7,r0 + ori r10,r7,1/* stream=1 */ + + lis r8,0x8000 /* GO=1 */ + clrldi r8,r8,32 + +.machine push +.machine "power4" + dcbtr0,r6,0b01000 + dcbtr0,r7,0b01010 + dcbtst r0,r9,0b01000 + dcbtst r0,r10,0b01010 + eieio + dcbtr0,r8,0b01010 /* GO */ +.machine pop + beq .Lunwind_stack_nonvmx_copy /* ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
RE: pread() and pwrite() system calls
> > A special pread/pwrite asm stub that just copies > > r7 to r0 could be used. > > > > Would it be enough to do: > > syscall_pread_pwrite: > > mov 0,7 > > sc > > blr > > and handle the -ve -> errno in C? > > Huh? Won't fly, r0 is used for the system call number! I was copying that from r7! Actually I have a much better stub by copying the one used for mmap(). The system call itself is fine. Using the system call almost halved the time taken for a 4-byte read. > On the other hand, I believed PPC had no problems passing > up to 8 32 bit arguments in registers (r3 to r10), but > I may be confusing with the standard ABI for function calls. > > Hmm, a quick look at kernel/entry_32.s shows that it should > be able to use at least r3 to r8, which should be sufficient. > > I think that it is an uClibc problem. True, in that it isn't a kernel bug. OTOH the kernel is likely to get blamed for non-atomic pread. I've found the same user-space code in newlib as well. glibc may be ok, some code I've found implies it only uses the 'emulation' when the system call returns ENOSYS. David ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
RE: pread() and pwrite() system calls
On Tue, 2012-05-29 at 09:28 +0100, David Laight wrote: > > > A special pread/pwrite asm stub that just copies > > > r7 to r0 could be used. > > > > > > Would it be enough to do: > > > syscall_pread_pwrite: > > > mov 0,7 > > > sc > > > blr > > > and handle the -ve -> errno in C? > > > > Huh? Won't fly, r0 is used for the system call number! > > I was copying that from r7! Using the non-existant 'mov' instruction! or 0,7,7 is much clearer :) cheers ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH] powerpc: Clear RI and EE at the same time in system call exit
In system call exit we currently clear RI and EE separately. An mtmsrd is a slow operation and we can save cycles by doing it all in one go. This does complicate things a bit - we have to be careful to restore RI if we branch out before returning to userspace. On a POWER7 with virtual cputime disabled this patch improves the null system call by 7%. Signed-off-by: Anton Blanchard --- Index: linux-build/arch/powerpc/kernel/entry_64.S === --- linux-build.orig/arch/powerpc/kernel/entry_64.S 2012-05-28 18:23:33.374451416 +1000 +++ linux-build/arch/powerpc/kernel/entry_64.S 2012-05-29 21:18:22.280934940 +1000 @@ -197,7 +197,16 @@ syscall_exit: wrteei 0 #else ld r10,PACAKMSR(r13) - mtmsrd r10,1 + /* +* For performance reasons we clear RI the same time that we +* clear EE. We only need to clear RI just before we restore r13 +* below, but batching it with EE saves us one expensive mtmsrd call. +* We have to be careful to restore RI if we branch anywhere from +* here (eg syscall_exit_work). +*/ + li r9,MSR_RI + andcr11,r10,r9 + mtmsrd r11,1 #endif /* CONFIG_PPC_BOOK3E */ ld r9,TI_FLAGS(r12) @@ -214,17 +223,6 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) andi. r6,r8,MSR_PR ld r4,_LINK(r1) - /* -* Clear RI before restoring r13. If we are returning to -* userspace and we take an exception after restoring r13, -* we end up corrupting the userspace r13 value. -*/ -#ifdef CONFIG_PPC_BOOK3S - /* No MSR:RI on BookE */ - li r12,MSR_RI - andcr11,r10,r12 - mtmsrd r11,1 /* clear MSR.RI */ -#endif /* CONFIG_PPC_BOOK3S */ beq-1f ACCOUNT_CPU_USER_EXIT(r11, r12) @@ -271,6 +269,7 @@ syscall_enosys: b syscall_exit syscall_exit_work: + mtmsrd r10,1 /* Restore RI */ /* If TIF_RESTOREALL is set, don't scribble on either r3 or ccr. If TIF_NOERROR is set, just save r3 as it is. */ ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
MSR loses DE bit in 3.3, BDI2000 cant handle breakpoints?
I cannot make simple break points using BDI2000 work in 3.3, abatro suggests that it depends on MSR[DE] is cleared by the kernel. With the emulator I can see that MSR[DE] is off quite often by just stopping at random times and looking at MSR so it seems like the kernel is turning MSR[DE] off most of the time. Anyone else having success debugging 3.3 with BDI2000? This is on a P2010(E500/BOOKE) CPU. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
RE: [linuxppc-release] [PATCH v5 1/5] powerpc/85xx: implement hardware timebase sync
Hi Kumar, There is no comment for these patches so far. Do you think these patches can be merged? We really want these patches to be merged in this merge window. Thanks. Best Regards, Chenhui > -Original Message- > From: Zhao Chenhui-B35336 > Sent: Friday, May 25, 2012 3:09 PM > To: Wood Scott-B07421; ga...@kernel.crashing.org > Cc: Li Yang-R58472 > Subject: RE: [linuxppc-release] [PATCH v5 1/5] powerpc/85xx: implement > hardware timebase sync > > Hi Scott and Kumar, > > Do you have comments for these patches? > > http://patchwork.ozlabs.org/patch/158484/ > http://patchwork.ozlabs.org/patch/158485/ > http://patchwork.ozlabs.org/patch/158487/ > http://patchwork.ozlabs.org/patch/158486/ > http://patchwork.ozlabs.org/patch/158488/ > > Thanks. > > Best Regards, > Chenhui > > > -Original Message- > > From: linuxppc-release-boun...@linux.freescale.net [mailto:linuxppc-release- > > boun...@linux.freescale.net] On Behalf Of Zhao Chenhui-B35336 > > Sent: Friday, May 11, 2012 7:54 PM > > To: linuxppc-dev@lists.ozlabs.org > > Cc: Wood Scott-B07421; Li Yang-R58472; linux-ker...@vger.kernel.org; > > ga...@kernel.crashing.org > > Subject: [linuxppc-release] [PATCH v5 1/5] powerpc/85xx: implement hardware > > timebase sync > > > > Do hardware timebase sync. Firstly, stop all timebases, and transfer > > the timebase value of the boot core to the other core. Finally, > > start all timebases. > > > > Only apply to dual-core chips, such as MPC8572, P2020, etc. > > > > Signed-off-by: Zhao Chenhui > > Signed-off-by: Li Yang > > --- > > arch/powerpc/include/asm/fsl_guts.h |2 + > > arch/powerpc/platforms/85xx/smp.c | 93 > > +-- > > 2 files changed, 91 insertions(+), 4 deletions(-) > > > > diff --git a/arch/powerpc/include/asm/fsl_guts.h > > b/arch/powerpc/include/asm/fsl_guts.h > > index aa4c488..dd5ba2c 100644 > > --- a/arch/powerpc/include/asm/fsl_guts.h > > +++ b/arch/powerpc/include/asm/fsl_guts.h > > @@ -48,6 +48,8 @@ struct ccsr_guts { > > __be32 dmuxcr;/* 0x.0068 - DMA Mux Control Register */ > > u8 res06c[0x70 - 0x6c]; > > __be32 devdisr;/* 0x.0070 - Device Disable Control */ > > +#define CCSR_GUTS_DEVDISR_TB1 0x1000 > > +#define CCSR_GUTS_DEVDISR_TB0 0x4000 > > __be32 devdisr2; /* 0x.0074 - Device Disable Control 2 */ > > u8 res078[0x7c - 0x78]; > > __be32 pmjcr; /* 0x.007c - 4 Power Management Jog Control > > Register */ > > diff --git a/arch/powerpc/platforms/85xx/smp.c > > b/arch/powerpc/platforms/85xx/smp.c > > index ff42490..6862dda 100644 > > --- a/arch/powerpc/platforms/85xx/smp.c > > +++ b/arch/powerpc/platforms/85xx/smp.c > > @@ -24,6 +24,7 @@ > > #include > > #include > > #include > > +#include > > > > #include > > #include > > @@ -115,13 +116,70 @@ smp_85xx_kick_cpu(int nr) > > > > struct smp_ops_t smp_85xx_ops = { > > .kick_cpu = smp_85xx_kick_cpu, > > -#ifdef CONFIG_KEXEC > > - .give_timebase = smp_generic_give_timebase, > > - .take_timebase = smp_generic_take_timebase, > > -#endif > > }; > > > > #ifdef CONFIG_KEXEC > > +static struct ccsr_guts __iomem *guts; > > +static u64 timebase; > > +static int tb_req; > > +static int tb_valid; > > + > > +static void mpc85xx_timebase_freeze(int freeze) > > +{ > > + unsigned int mask; > > + > > + if (!guts) > > + return; > > + > > + mask = CCSR_GUTS_DEVDISR_TB0 | CCSR_GUTS_DEVDISR_TB1; > > + if (freeze) > > + setbits32(&guts->devdisr, mask); > > + else > > + clrbits32(&guts->devdisr, mask); > > + > > + in_be32(&guts->devdisr); > > +} > > + > > +static void mpc85xx_give_timebase(void) > > +{ > > + unsigned long flags; > > + > > + local_irq_save(flags); > > + > > + while (!tb_req) > > + barrier(); > > + tb_req = 0; > > + > > + mpc85xx_timebase_freeze(1); > > + timebase = get_tb(); > > + mb(); > > + tb_valid = 1; > > + > > + while (tb_valid) > > + barrier(); > > + > > + mpc85xx_timebase_freeze(0); > > + > > + local_irq_restore(flags); > > +} > > + > > +static void mpc85xx_take_timebase(void) > > +{ > > + unsigned long flags; > > + > > + local_irq_save(flags); > > + > > + tb_req = 1; > > + while (!tb_valid) > > + barrier(); > > + > > + set_tb(timebase >> 32, timebase & 0x); > > + mb(); > > + tb_valid = 0; > > + > > + local_irq_restore(flags); > > +} > > + > > atomic_t kexec_down_cpus = ATOMIC_INIT(0); > > > > void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary) > > @@ -228,6 +286,20 @@ smp_85xx_setup_cpu(int cpu_nr) > > doorbell_setup_this_cpu(); > > } > > > > +#ifdef CONFIG_KEXEC > > +static const struct of_device_id guts_ids[] = { > > + { .compatible = "fsl,mpc8572-guts", }, > > + { .compatible = "fsl,mpc8560-guts", }, > > + { .compatible = "fsl,mpc8536-guts", }, > > + { .compatible = "fsl,p1020-guts", }, > > + { .compat
Re: ppc/sata-fsl: orphan config value: CONFIG_MPC8315_DS
On 05/26/2012 01:53 AM, Anthony Foiani wrote: > Li Yang-R58472 writes: > >> Thanks for bringing [CONFIG_MPC8315_DS] up again. Looks like we do >> have a problem here. > > My impression is that the simplest fix is Adrian's patch, which simply > keys off CONFIG_MPC831x_RDB. It's not very satisfying, but I'll take > "working" vs. "rare lockups at boot". CONFIG_MPC831x_RDB doesn't mean that you're running on such a board, only that the kernel supports those boards. It should be a runtime test. -Scott ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH] powerpc: Clear RI and EE at the same time in system call exit
On 05/29/2012 06:20 AM, Anton Blanchard wrote: > > In system call exit we currently clear RI and EE separately. An > mtmsrd is a slow operation and we can save cycles by doing it all > in one go. > > This does complicate things a bit - we have to be careful to restore > RI if we branch out before returning to userspace. > > On a POWER7 with virtual cputime disabled this patch improves the > null system call by 7%. > > Signed-off-by: Anton Blanchard > --- > > Index: linux-build/arch/powerpc/kernel/entry_64.S > === > --- linux-build.orig/arch/powerpc/kernel/entry_64.S 2012-05-28 > 18:23:33.374451416 +1000 > +++ linux-build/arch/powerpc/kernel/entry_64.S2012-05-29 > 21:18:22.280934940 +1000 > @@ -197,7 +197,16 @@ syscall_exit: > wrteei 0 > #else > ld r10,PACAKMSR(r13) > - mtmsrd r10,1 > + /* > + * For performance reasons we clear RI the same time that we > + * clear EE. We only need to clear RI just before we restore r13 > + * below, but batching it with EE saves us one expensive mtmsrd call. > + * We have to be careful to restore RI if we branch anywhere from > + * here (eg syscall_exit_work). > + */ > + li r9,MSR_RI > + andcr11,r10,r9 > + mtmsrd r11,1 > #endif /* CONFIG_PPC_BOOK3E */ > > ld r9,TI_FLAGS(r12) > @@ -214,17 +223,6 @@ BEGIN_FTR_SECTION > END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) > andi. r6,r8,MSR_PR > ld r4,_LINK(r1) > - /* > - * Clear RI before restoring r13. If we are returning to > - * userspace and we take an exception after restoring r13, > - * we end up corrupting the userspace r13 value. > - */ > -#ifdef CONFIG_PPC_BOOK3S > - /* No MSR:RI on BookE */ > - li r12,MSR_RI > - andcr11,r10,r12 > - mtmsrd r11,1 /* clear MSR.RI */ > -#endif /* CONFIG_PPC_BOOK3S */ > > beq-1f > ACCOUNT_CPU_USER_EXIT(r11, r12) > @@ -271,6 +269,7 @@ syscall_enosys: > b syscall_exit > > syscall_exit_work: > + mtmsrd r10,1 /* Restore RI */ That mtmsrd needs an #ifdef CONFIG_PPC_BOOK3S -Scott ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: MSR loses DE bit in 3.3, BDI2000 cant handle breakpoints?
Bob Cochran wrote on 2012/05/29 20:13:21: > > On 05/29/2012 08:00 AM, Joakim Tjernlund wrote: > > > > I cannot make simple break points using BDI2000 work in 3.3, abatro > > suggests that it > > depends on MSR[DE] is cleared by the kernel. With the emulator I can see > > that > > MSR[DE] is off quite often by just stopping at random times and looking at > > MSR so > > it seems like the kernel is turning MSR[DE] off most of the time. > > Anyone else having success debugging 3.3 with BDI2000? > > > > This is on a P2010(E500/BOOKE) CPU. > > > > ___ > > Linuxppc-dev mailing list > > Linuxppc-dev@lists.ozlabs.org > > https://lists.ozlabs.org/listinfo/linuxppc-dev > > > > I debug using Freescale CodeWarrior and a USB tap, which also rely on > MSR[DE] being set. I develop from the mainline & have a patch set that > I just recently re-tweaked to support kernel debugging. > > If you want, I'll send you my set of patches for the kernel. They might > be useful (not sure since I don't use BDI). Thanks, that could be useful, however I just figured something out. Changing diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h index 500fe1d..0cb259b 100644 --- a/arch/powerpc/include/asm/reg_booke.h +++ b/arch/powerpc/include/asm/reg_booke.h @@ -37,7 +37,7 @@ #define MSR_KERNEL (MSR_ME|MSR_RI|MSR_IR|MSR_DR|MSR_CE) #define MSR_USER (MSR_KERNEL|MSR_PR|MSR_EE) #else -#define MSR_KERNEL (MSR_ME|MSR_RI|MSR_CE) +#define MSR_KERNEL (MSR_ME|MSR_RI|MSR_CE|MSR_DE) #define MSR_USER (MSR_KERNEL|MSR_PR|MSR_EE) #endif Made it work(possible one should change MSR_USER too?) Question now is why MSR_DE is not on by default? Especially since BDI2000 is supported by the kernel(CONFIG_BDI_SWITCH=y) is on in my kernel? Jocke ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: MSR loses DE bit in 3.3, BDI2000 cant handle breakpoints?
On 05/29/2012 08:00 AM, Joakim Tjernlund wrote: I cannot make simple break points using BDI2000 work in 3.3, abatro suggests that it depends on MSR[DE] is cleared by the kernel. With the emulator I can see that MSR[DE] is off quite often by just stopping at random times and looking at MSR so it seems like the kernel is turning MSR[DE] off most of the time. Anyone else having success debugging 3.3 with BDI2000? This is on a P2010(E500/BOOKE) CPU. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev I debug using Freescale CodeWarrior and a USB tap, which also rely on MSR[DE] being set. I develop from the mainline & have a patch set that I just recently re-tweaked to support kernel debugging. If you want, I'll send you my set of patches for the kernel. They might be useful (not sure since I don't use BDI). ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: MSR loses DE bit in 3.3, BDI2000 cant handle breakpoints?
Dear Bob, In message <4fc511c1.4050...@mindchasers.com> you wrote: > > I debug using Freescale CodeWarrior and a USB tap, which also rely on > MSR[DE] being set. I develop from the mainline & have a patch set that > I just recently re-tweaked to support kernel debugging. > > If you want, I'll send you my set of patches for the kernel. They might > be useful (not sure since I don't use BDI). Please rather submit as a patch on the mailing list. Thanks. Best regards, Wolfgang Denk -- DENX Software Engineering GmbH, MD: Wolfgang Denk & Detlev Zundel HRB 165235 Munich, Office: Kirchenstr.5, D-82194 Groebenzell, Germany Phone: (+49)-8142-66989-10 Fax: (+49)-8142-66989-80 Email: w...@denx.de egrep patterns are full regular expressions; it uses a fast determi- nistic algorithm that sometimes needs exponential space. - unix manuals ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: ppc/sata-fsl: orphan config value: CONFIG_MPC8315_DS
Scott Wood writes: > CONFIG_MPC831x_RDB doesn't mean that you're running on such a board, > only that the kernel supports those boards. It should be a runtime > test. Point taken. If that SATA check is CPU/SOC-based, then it should be easy enough to test. The cpuinfo for my board is: # cat /proc/cpuinfo processor : 0 cpu : e300c3 clock : 266.64MHz revision: 2.0 (pvr 8085 0020) bogomips: 66.66 timebase: On the other hand, if the problem is actually caused by board trace routing (or other hardware that's outside the control of the CPU/SOC), then I don't know how possible a runtime check will be. Do you know if there is a specific errata that the MPC8315_DS ran across that required this fix, or was it a band-aid in the first place? Either way, thanks for looking into this. Thanks, Tony ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH] powerpc: Clear RI and EE at the same time in system call exit
Hi Scott, > > syscall_exit_work: > > + mtmsrd r10,1 /* Restore RI */ > > That mtmsrd needs an #ifdef CONFIG_PPC_BOOK3S Thanks! Anton -- In system call exit we currently clear RI and EE separately. An mtmsrd is a slow operation and we can save cycles by doing it all in one go. This does complicate things a bit - we have to be careful to restore RI if we branch out before returning to userspace. On a POWER7 with virtual cputime disabled this patch improves the null system call by 7%. Signed-off-by: Anton Blanchard --- Index: linux-build/arch/powerpc/kernel/entry_64.S === --- linux-build.orig/arch/powerpc/kernel/entry_64.S 2012-05-29 21:22:40.293549055 +1000 +++ linux-build/arch/powerpc/kernel/entry_64.S 2012-05-30 07:47:31.380737406 +1000 @@ -197,7 +197,16 @@ syscall_exit: wrteei 0 #else ld r10,PACAKMSR(r13) - mtmsrd r10,1 + /* +* For performance reasons we clear RI the same time that we +* clear EE. We only need to clear RI just before we restore r13 +* below, but batching it with EE saves us one expensive mtmsrd call. +* We have to be careful to restore RI if we branch anywhere from +* here (eg syscall_exit_work). +*/ + li r9,MSR_RI + andcr11,r10,r9 + mtmsrd r11,1 #endif /* CONFIG_PPC_BOOK3E */ ld r9,TI_FLAGS(r12) @@ -214,17 +223,6 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) andi. r6,r8,MSR_PR ld r4,_LINK(r1) - /* -* Clear RI before restoring r13. If we are returning to -* userspace and we take an exception after restoring r13, -* we end up corrupting the userspace r13 value. -*/ -#ifdef CONFIG_PPC_BOOK3S - /* No MSR:RI on BookE */ - li r12,MSR_RI - andcr11,r10,r12 - mtmsrd r11,1 /* clear MSR.RI */ -#endif /* CONFIG_PPC_BOOK3S */ beq-1f ACCOUNT_CPU_USER_EXIT(r11, r12) @@ -271,6 +269,9 @@ syscall_enosys: b syscall_exit syscall_exit_work: +#ifdef CONFIG_PPC_BOOK3S + mtmsrd r10,1 /* Restore RI */ +#endif /* If TIF_RESTOREALL is set, don't scribble on either r3 or ccr. If TIF_NOERROR is set, just save r3 as it is. */ ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: ppc/sata-fsl: orphan config value: CONFIG_MPC8315_DS
On 05/29/2012 05:07 PM, Anthony Foiani wrote: > Scott Wood writes: > >> CONFIG_MPC831x_RDB doesn't mean that you're running on such a board, >> only that the kernel supports those boards. It should be a runtime >> test. > > Point taken. > > If that SATA check is CPU/SOC-based, then it should be easy enough to > test. The cpuinfo for my board is: > > # cat /proc/cpuinfo > processor : 0 > cpu : e300c3 > clock : 266.64MHz > revision: 2.0 (pvr 8085 0020) > bogomips: 66.66 > timebase: > > On the other hand, if the problem is actually caused by board trace > routing (or other hardware that's outside the control of the CPU/SOC), > then I don't know how possible a runtime check will be. Board information is available from the device tree, and from platform code that was selected based on the device tree. > Do you know if there is a specific errata that the MPC8315_DS ran > across that required this fix, or was it a band-aid in the first > place? I don't know the history of this, sorry. It looks like Yang Li added this code -- Yang, can you answer this? -Scott ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 1/2] powerpc: Rename copyuser_power7_vmx.c to vmx-helper.c
Subsequent patches will add more VMX library functions and it makes sense to keep all the c-code helper functions in the one file. Signed-off-by: Anton Blanchard --- Index: linux-build/arch/powerpc/lib/Makefile === --- linux-build.orig/arch/powerpc/lib/Makefile 2012-05-30 09:39:59.084233436 +1000 +++ linux-build/arch/powerpc/lib/Makefile 2012-05-30 10:22:32.565764322 +1000 @@ -24,7 +24,7 @@ obj-$(CONFIG_HAVE_HW_BREAKPOINT) += sste ifeq ($(CONFIG_PPC64),y) obj-$(CONFIG_SMP) += locks.o -obj-$(CONFIG_ALTIVEC) += copyuser_power7_vmx.o +obj-$(CONFIG_ALTIVEC) += vmx-helper.o endif obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o Index: linux-build/arch/powerpc/lib/vmx-helper.c === --- /dev/null 1970-01-01 00:00:00.0 + +++ linux-build/arch/powerpc/lib/vmx-helper.c 2012-05-30 10:22:32.577764541 +1000 @@ -0,0 +1,51 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2011 + * + * Authors: Sukadev Bhattiprolu + * Anton Blanchard + */ +#include +#include +#include + +int enter_vmx_usercopy(void) +{ + if (in_interrupt()) + return 0; + + /* This acts as preempt_disable() as well and will make +* enable_kernel_altivec(). We need to disable page faults +* as they can call schedule and thus make us lose the VMX +* context. So on page faults, we just fail which will cause +* a fallback to the normal non-vmx copy. +*/ + pagefault_disable(); + + enable_kernel_altivec(); + + return 1; +} + +/* + * This function must return 0 because we tail call optimise when calling + * from __copy_tofrom_user_power7 which returns 0 on success. + */ +int exit_vmx_usercopy(void) +{ + pagefault_enable(); + return 0; +} Index: linux-build/arch/powerpc/lib/copyuser_power7_vmx.c === --- linux-build.orig/arch/powerpc/lib/copyuser_power7_vmx.c 2012-05-28 17:18:38.213091662 +1000 +++ /dev/null 1970-01-01 00:00:00.0 + @@ -1,51 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * Copyright (C) IBM Corporation, 2011 - * - * Authors: Sukadev Bhattiprolu - * Anton Blanchard - */ -#include -#include -#include - -int enter_vmx_copy(void) -{ - if (in_interrupt()) - return 0; - - /* This acts as preempt_disable() as well and will make -* enable_kernel_altivec(). We need to disable page faults -* as they can call schedule and thus make us lose the VMX -* context. So on page faults, we just fail which will cause -* a fallback to the normal non-vmx copy. -*/ - pagefault_disable(); - - enable_kernel_altivec(); - - return 1; -} - -/* - * This function must return 0 because we tail call optimise when calling - * from __copy_tofrom_user_power7 which returns 0 on success. - */ -int exit_vmx_copy(void) -{ - pagefault_enable(); - return 0; -} Index: linux-build/arch/powerpc/lib/copyuser_power7.S === --- linux-build.orig/arch/powerpc/lib/copyuser_power7.S 2012-05-29 21:22:43.725611809 +1000 +++ linux-build/arch/powerpc/lib/copyuser_power7.S 2012-05-30 10:23:29.198797007 +1000 @@ -61,7 +61,7 @@ ld r15,STK_REG(r15)(r1) ld r14,STK_REG(r14)(r1) .Ldo_err3: - bl .exit_vmx_copy + bl .exit_vmx_usercopy
[PATCH 2/2] powerpc: POWER7 optimised copy_page using VMX and enhanced prefetch
Implement a POWER7 optimised copy_page using VMX and enhanced prefetch instructions. We use enhanced prefetch hints to prefetch both the load and store side. We copy a cacheline at a time and fall back to regular loads and stores if we are unable to use VMX (eg we are in an interrupt). The following microbenchmark was used to assess the impact of the patch: http://ozlabs.org/~anton/junkcode/page_fault_file.c We test MAP_PRIVATE page faults across a 1GB file, 100 times: # time ./page_fault_file -p -l 1G -i 100 Before: 22.25s After: 18.89s 17% faster Signed-off-by: Anton Blanchard --- Index: linux-build/arch/powerpc/lib/copypage_power7.S === --- /dev/null 1970-01-01 00:00:00.0 + +++ linux-build/arch/powerpc/lib/copypage_power7.S 2012-05-30 14:20:32.457035092 +1000 @@ -0,0 +1,168 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2012 + * + * Author: Anton Blanchard + */ +#include +#include + +#define STACKFRAMESIZE 256 +#define STK_REG(i) (112 + ((i)-14)*8) + +_GLOBAL(copypage_power7) + /* +* We prefetch both the source and destination using enhanced touch +* instructions. We use a stream ID of 0 for the load side and +* 1 for the store side. Since source and destination are page +* aligned we don't need to clear the bottom 7 bits of either +* address. +*/ + ori r9,r3,1 /* stream=1 */ + +#ifdef CONFIG_PPC_64K_PAGES + lis r7,0x0E01 /* depth=7, units=512 */ +#else + lis r7,0x0E00 /* depth=7 */ + ori r7,r7,0x1000/* units=32 */ +#endif + ori r10,r7,1/* stream=1 */ + + lis r8,0x8000 /* GO=1 */ + clrldi r8,r8,32 + +.machine push +.machine "power4" + dcbtr0,r4,0b01000 + dcbtr0,r7,0b01010 + dcbtst r0,r9,0b01000 + dcbtst r0,r10,0b01010 + eieio + dcbtr0,r8,0b01010 /* GO */ +.machine pop + +#ifdef CONFIG_ALTIVEC + mflrr0 + std r3,48(r1) + std r4,56(r1) + std r0,16(r1) + stdur1,-STACKFRAMESIZE(r1) + bl .enter_vmx_copy + cmpwi r3,0 + ld r0,STACKFRAMESIZE+16(r1) + ld r3,STACKFRAMESIZE+48(r1) + ld r4,STACKFRAMESIZE+56(r1) + mtlrr0 + + li r0,(PAGE_SIZE/128) + mtctr r0 + + beq .Lnonvmx_copy + + addir1,r1,STACKFRAMESIZE + + li r6,16 + li r7,32 + li r8,48 + li r9,64 + li r10,80 + li r11,96 + li r12,112 + + .align 5 +1: lvx vr7,r0,r4 + lvx vr6,r4,r6 + lvx vr5,r4,r7 + lvx vr4,r4,r8 + lvx vr3,r4,r9 + lvx vr2,r4,r10 + lvx vr1,r4,r11 + lvx vr0,r4,r12 + addir4,r4,128 + stvxvr7,r0,r3 + stvxvr6,r3,r6 + stvxvr5,r3,r7 + stvxvr4,r3,r8 + stvxvr3,r3,r9 + stvxvr2,r3,r10 + stvxvr1,r3,r11 + stvxvr0,r3,r12 + addir3,r3,128 + bdnz1b + + b .exit_vmx_copy /* tail call optimise */ + +#else + li r0,(PAGE_SIZE/128) + mtctr r0 + + stdur1,-STACKFRAMESIZE(r1) +#endif + +.Lnonvmx_copy: + std r14,STK_REG(r14)(r1) + std r15,STK_REG(r15)(r1) + std r16,STK_REG(r16)(r1) + std r17,STK_REG(r17)(r1) + std r18,STK_REG(r18)(r1) + std r19,STK_REG(r19)(r1) + std r20,STK_REG(r20)(r1) + +1: ld r0,0(r4) + ld r5,8(r4) + ld r6,16(r4) + ld r7,24(r4) + ld r8,32(r4) + ld r9,40(r4) + ld r10,48(r4) + ld r11,56(r4) + ld r12,64(r4) + ld r14,72(r4) + ld r15,80(r4) + ld r16,88(r4) + ld r17,96(r4) + ld r18,104(r4) + ld r19,112(r4) + ld r20,120(r4) + addir4,r4,128 + std r0,0(r3) + std r5,8(r3) + std r6,16(r3) + std r7,24(r3) + std r8,32(r3) + std r9,40(r3) + std r10,48(r3) + std