Re: KGDB panics on p2020 target
On 01/17/2014 03:52 PM, Arun Chandran wrote: Hi, I am testing kgdb on freescale p2020 target. In target 1) root@freescale-p2020ds:~# uname -a Linux freescale-p2020ds 3.10.20-rt14+ #9 SMP Thu Jan 16 16:32:15 IST 2014 ppc GNU/Linux 2) root@freescale-p2020ds:~# cat /proc/cpuinfo processor : 0 cpu : e500v2 clock : 999.990008MHz revision: 4.0 (pvr 8021 1040) bogomips: 124.99 processor : 1 cpu : e500v2 clock : 999.990008MHz revision: 4.0 (pvr 8021 1040) bogomips: 124.99 total bogomips : 249.99 timebase: 62499376 platform: P2020 DS model : fsl,P2020DS Memory : 768 MB 3) freescale-p2020ds:~# echo ttyS1,115200 /sys/module/kgdboc/parameters/kgdoc 4) I set up host (settings given below); Then I send SysRq : DEBUG In host -- (gdb) target remote /dev/ttyS0 Remote debugging using /dev/ttyS0 kgdb_breakpoint () at kernel/debug/debug_core.c:1013 1013 arch_kgdb_breakpoint(); (gdb) b sys_sync Breakpoint 1 at 0xc0167288: file fs/sync.c, line 103. (gdb) c Continuing. I am able to take control in host; after that I am setting breakpoint at sys_sync In target root@freescale-p2020ds:~# for i in 1 2 3 4 5 6 7 8 9 do sync done In host -- Breakpoint 1, sys_sync () at fs/sync.c:103 103 { (gdb) c Continuing. Breakpoint is hit only one time instead of 9 times; after that target hangs. I recommend you try upstream to take a further look at this, instead of that Freescale distribution. As I recall currently KGDB works well in 85xx case in ML. Then i tried to send SysRq : DEBUG in target kernel panics. I have pasted the panic below. # SysRq : DEBUG Kernel panic - not syncing: Recursive entry to debugger The kernel already trap into kgdb_handle_exception() with the debug exception while triggering that break point, but again you trigger another debug exception by SysRq. Actually KGDB can't handle such this recursive behavior, so KGDB always call kgdb_reenter_check() to prevent this scenario with this call trace. static int kgdb_reenter_check(struct kgdb_state *ks) { unsigned long addr; if (atomic_read(kgdb_active) != raw_smp_processor_id()) return 0; ... if (exception_level 1) { dump_stack(); panic(Recursive entry to debugger); } Tiejun CPU: 1 PID: 2266 Comm: cron Not tainted 3.10.20-rt14+ #6 Call Trace: [effe5d10] [c0008060] show_stack+0x4c/0x168 (unreliable) [effe5d50] [c0588878] panic+0xe4/0x224 [effe5da0] [c00b2cbc] kgdb_handle_exception+0x1d4/0x1f8 [effe5df0] [c0010038] kgdb_handle_breakpoint+0x4c/0x80 [effe5e00] [c057e7e0] program_check_exception+0x10c/0x264 [effe5e10] [c000f660] ret_from_except_full+0x0/0x4c --- Exception: 700 at sysrq_handle_dbg+0x3c/0xc8 LR = __handle_sysrq+0x154/0x1cc [effe5ed0] [c033df5c] __handle_sysrq+0x140/0x1cc (unreliable) [effe5f00] [c0353ef8] serial8250_rx_chars+0xe8/0x218 [effe5f30] [c0359644] fsl8250_handle_irq+0xac/0x174 [effe5f50] [c0352f9c] serial8250_interrupt+0x40/0xe8 [effe5f70] [c00b5500] handle_irq_event_percpu+0xcc/0x2a8 [effe5fc0] [c00b5720] handle_irq_event+0x44/0x74 [effe5fe0] [c00b8e14] handle_fasteoi_irq+0xd0/0x17c [effe5ff0] [c000d58c] call_handle_irq+0x18/0x28 [c4f91b10] [c0004f60] do_IRQ+0x150/0x224 [c4f91b40] [c000f6ac] ret_from_except+0x0/0x18 --- Exception: 501 at rpcauth_lookup_credcache+0x138/0x2a4 LR = rpcauth_lookup_credcache+0xb8/0x2a4 [c4f91c00] [24002424] 0x24002424 (unreliable) [c4f91c50] [c055cb84] rpcauth_lookupcred+0x64/0xac [c4f91c80] [c055ce2c] rpcauth_refreshcred+0x11c/0x124 [c4f91cc0] [c055ac80] __rpc_execute+0x8c/0x330 [c4f91d10] [c05540b8] rpc_run_task+0x9c/0xc4 [c4f91d20] [c0554204] rpc_call_sync+0x50/0xb8 [c4f91d50] [c0257164] nfs_proc_getattr+0x48/0x5c [c4f91d70] [c024aaa4] __nfs_revalidate_inode+0xa8/0x168 [c4f91d90] [c024ac1c] nfs_revalidate_mapping+0xb8/0x194 [c4f91da0] [c0251f00] nfs_follow_link+0x24/0xc8 [c4f91dc0] [c0145280] path_lookupat+0x2f4/0x824 [c4f91e10] [c01457dc] filename_lookup.isra.33+0x2c/0x8c [c4f91e30] [c0147a74] user_path_at_empty+0x58/0x9c [c4f91eb0] [c013d5bc] vfs_fstatat+0x54/0xb4 [c4f91ee0] [c013d93c] SyS_stat64+0x1c/0x44 [c4f91f40] [c000eec0] ret_from_syscall+0x0/0x3c --- Exception: c01 at 0xff08a98 LR = 0xfed53e8 CPU: 1 PID: 2266 Comm: cron Not tainted 3.10.20-rt14+ #6 Call Trace: [effe5bb0] [c0008060] show_stack+0x4c/0x168 (unreliable) [effe5bf0] [c00b2cac] kgdb_handle_exception+0x1c4/0x1f8 [effe5c40] [c0010038] kgdb_handle_breakpoint+0x4c/0x80 [effe5c50] [c057e7e0] program_check_exception+0x10c/0x264 [effe5c60] [c000f660] ret_from_except_full+0x0/0x4c --- Exception: 700 at kgdb_panic_event+0x1c/0x3c LR = notifier_call_chain+0x60/0xb0 [effe5d20] [](nil) (unreliable) [effe5d40] [c05819dc] __atomic_notifier_call_chain+0x14/0x24 [effe5d50] [c05888a8] panic+0x114/0x224
Re: KGDB panics on p2020 target
On 01/21/2014 02:04 PM, Arun Chandran wrote: On Mon, Jan 20, 2014 at 1:55 PM, �tiejun.chen� tiejun.c...@windriver.comwrote: On 01/17/2014 03:52 PM, Arun Chandran wrote: Hi, I am testing kgdb on freescale p2020 target. In target 1) root@freescale-p2020ds:~# uname -a Linux freescale-p2020ds 3.10.20-rt14+ #9 SMP Thu Jan 16 16:32:15 IST 2014 ppc GNU/Linux 2) root@freescale-p2020ds:~# cat /proc/cpuinfo processor : 0 cpu : e500v2 clock : 999.990008MHz revision: 4.0 (pvr 8021 1040) bogomips: 124.99 processor : 1 cpu : e500v2 clock : 999.990008MHz revision: 4.0 (pvr 8021 1040) bogomips: 124.99 total bogomips : 249.99 timebase: 62499376 platform: P2020 DS model : fsl,P2020DS Memory : 768 MB 3) freescale-p2020ds:~# echo ttyS1,115200 /sys/module/kgdboc/parameters/kgdoc 4) I set up host (settings given below); Then I send SysRq : DEBUG In host -- (gdb) target remote /dev/ttyS0 Remote debugging using /dev/ttyS0 kgdb_breakpoint () at kernel/debug/debug_core.c:1013 1013 arch_kgdb_breakpoint(); (gdb) b sys_sync Breakpoint 1 at 0xc0167288: file fs/sync.c, line 103. (gdb) c Continuing. I am able to take control in host; after that I am setting breakpoint at sys_sync In target root@freescale-p2020ds:~# for i in 1 2 3 4 5 6 7 8 9 do sync done In host -- Breakpoint 1, sys_sync () at fs/sync.c:103 103 { (gdb) c Continuing. Breakpoint is hit only one time instead of 9 times; after that target hangs. I recommend you try upstream to take a further look at this, instead of that Freescale distribution. As I recall currently KGDB works well in 85xx case in ML. Many thanks for your suggestion. I tested the same thing on linux-3.12.y ( https://git.kernel.org/cgit/linux/kernel/git/stable/linux-stable.git/log/?id=refs%2Ftags%2Fv3.12.8h=linux-3.12.y ). Please go powerpc tree, git clone git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc.git git checkout next root@p2020ds:~# uname -a Linux freescale-p2020ds 3.12.8 #136 SMP Tue Jan 21 11:14:13 IST 2014 ppc GNU/Linux The breakpoint I placed at sys_sync is hit only once. After that my gdb command continue does not make further hits. And the target is hung. It is not even responding to my further sending of SysRq : DEBUG I have attached my .config Then i tried to send SysRq : DEBUG in target kernel panics. I have pasted the panic below. # SysRq : DEBUG Kernel panic - not syncing: Recursive entry to debugger The kernel already trap into kgdb_handle_exception() with the debug exception while triggering that break point, but again you trigger another debug exception by SysRq. Actually KGDB can't handle such this recursive behavior, so KGDB always call kgdb_reenter_check() to prevent this scenario with this call trace. I am doing it because the target is hung after the initial breakpoint is hit. This is just what I mean. Although looks your kernel is hung, actually your kernel is already in kgdb exception handler previously. So after you send break by SysRq to debug, its not allowed by current kgdb scheme as I said. The following call trace also show this path explicitly. Tiejun static int kgdb_reenter_check(struct kgdb_state *ks) { unsigned long addr; if (atomic_read(kgdb_active) != raw_smp_processor_id()) return 0; ... if (exception_level 1) { dump_stack(); panic(Recursive entry to debugger); } Tiejun CPU: 1 PID: 2266 Comm: cron Not tainted 3.10.20-rt14+ #6 Call Trace: [effe5d10] [c0008060] show_stack+0x4c/0x168 (unreliable) [effe5d50] [c0588878] panic+0xe4/0x224 [effe5da0] [c00b2cbc] kgdb_handle_exception+0x1d4/0x1f8 [effe5df0] [c0010038] kgdb_handle_breakpoint+0x4c/0x80 [effe5e00] [c057e7e0] program_check_exception+0x10c/0x264 [effe5e10] [c000f660] ret_from_except_full+0x0/0x4c --- Exception: 700 at sysrq_handle_dbg+0x3c/0xc8 LR = __handle_sysrq+0x154/0x1cc [effe5ed0] [c033df5c] __handle_sysrq+0x140/0x1cc (unreliable) [effe5f00] [c0353ef8] serial8250_rx_chars+0xe8/0x218 [effe5f30] [c0359644] fsl8250_handle_irq+0xac/0x174 [effe5f50] [c0352f9c] serial8250_interrupt+0x40/0xe8 [effe5f70] [c00b5500] handle_irq_event_percpu+0xcc/0x2a8 [effe5fc0] [c00b5720] handle_irq_event+0x44/0x74 [effe5fe0] [c00b8e14] handle_fasteoi_irq+0xd0/0x17c [effe5ff0] [c000d58c] call_handle_irq+0x18/0x28 [c4f91b10] [c0004f60] do_IRQ+0x150/0x224 [c4f91b40] [c000f6ac] ret_from_except+0x0/0x18 --- Exception: 501 at rpcauth_lookup_credcache+0x138/0x2a4 LR = rpcauth_lookup_credcache+0xb8/0x2a4 [c4f91c00] [24002424] 0x24002424 (unreliable) [c4f91c50] [c055cb84] rpcauth_lookupcred+0x64/0xac [c4f91c80] [c055ce2c] rpcauth_refreshcred+0x11c/0x124 [c4f91cc0] [c055ac80] __rpc_execute+0x8c/0x330 [c4f91d10] [c05540b8] rpc_run_task+0x9c/0xc4
Re: powerpc/hugetlb: BUG: using smp_processor_id() in preemptible
On 01/17/2014 05:23 PM, Nikita Yushchenko wrote: Hi While running LTP hugeltb tests on freescale powerpc board, I'm getting [ 7253.637591] BUG: using smp_processor_id() in preemptible [ ] code: hugemmap01/9048 [ 7253.637601] caller is free_hugepd_range.constprop.25+0x88/0x1a8 [ 7253.637605] CPU: 1 PID: 9048 Comm: hugemmap01 Not tainted 3.10.20-rt14+ #114 [ 7253.637606] Call Trace: [ 7253.637617] [cb049d80] [c0007ea4] show_stack+0x4c/0x168 (unreliable) [ 7253.637624] [cb049dc0] [c031c674] debug_smp_processor_id+0x114/0x134 [ 7253.637628] [cb049de0] [c0016d28] free_hugepd_range.constprop.25+0x88/0x1a8 [ 7253.637632] [cb049e00] [c001711c] hugetlb_free_pgd_range+0x6c/0x168 [ 7253.637639] [cb049e40] [c0117408] free_pgtables+0x12c/0x150 [ 7253.637646] [cb049e70] [c011ce38] unmap_region+0xa0/0x11c [ 7253.637671] [cb049ef0] [c011f03c] do_munmap+0x224/0x3bc [ 7253.637676] [cb049f20] [c011f2e0] vm_munmap+0x38/0x5c [ 7253.637682] [cb049f40] [c000ef88] ret_from_syscall+0x0/0x3c [ 7253.637686] --- Exception: c01 at 0xff16004 This is on 3.10 based kernel but looks like code in question did not change since then. Immediate reason of this dump is usage of smp_processor_id() in hugepd_free(), which is executed in preemptible context on this path. Perhaps need to add preempt_disable() / preempt_enable() somewhere. But what is the proper location for these? Could you try this? powerpc/hugetlb: replace __get_cpu_var with get_cpu_var Replace __get_cpu_var safely with get_cpu_var to avoid the following call trace: [ 7253.637591] BUG: using smp_processor_id() in preemptible [ ] code: hugemmap01/9048 [ 7253.637601] caller is free_hugepd_range.constprop.25+0x88/0x1a8 [ 7253.637605] CPU: 1 PID: 9048 Comm: hugemmap01 Not tainted 3.10.20-rt14+ #114 [ 7253.637606] Call Trace: [ 7253.637617] [cb049d80] [c0007ea4] show_stack+0x4c/0x168 (unreliable) [ 7253.637624] [cb049dc0] [c031c674] debug_smp_processor_id+0x114/0x134 [ 7253.637628] [cb049de0] [c0016d28] free_hugepd_range.constprop.25+0x88/0x1a8 [ 7253.637632] [cb049e00] [c001711c] hugetlb_free_pgd_range+0x6c/0x168 [ 7253.637639] [cb049e40] [c0117408] free_pgtables+0x12c/0x150 [ 7253.637646] [cb049e70] [c011ce38] unmap_region+0xa0/0x11c [ 7253.637671] [cb049ef0] [c011f03c] do_munmap+0x224/0x3bc [ 7253.637676] [cb049f20] [c011f2e0] vm_munmap+0x38/0x5c [ 7253.637682] [cb049f40] [c000ef88] ret_from_syscall+0x0/0x3c [ 7253.637686] --- Exception: c01 at 0xff16004 Signed-off-by: Tiejun Chentiejun.c...@windriver.com --- arch/powerpc/mm/hugetlbpage.c |4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index fb05b12..42779c0 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -400,12 +400,13 @@ static void hugepd_free(struct mmu_gather *tlb, void *hugepte) { struct hugepd_freelist **batchp; - batchp = __get_cpu_var(hugepd_freelist_cur); + batchp = get_cpu_var(hugepd_freelist_cur); if (atomic_read(tlb-mm-mm_users) 2 || cpumask_equal(mm_cpumask(tlb-mm), cpumask_of(smp_processor_id( { kmem_cache_free(hugepte_cache, hugepte); +put_cpu_var(hugepd_freelist_cur); return; } @@ -419,6 +420,7 @@ static void hugepd_free(struct mmu_gather *tlb, void *hugepte) call_rcu_sched((*batchp)-rcu, hugepd_free_rcu_callback); *batchp = NULL; } +put_cpu_var(hugepd_freelist_cur); } #endif -- 1.7.9.5 Tiejun Nikita ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [RFC][PATCH] powerpc/CoreNet64: compile with CONFIG_E{5, 6}500_CPU well
On 11/21/2013 12:41 AM, Scott Wood wrote: On Wed, 2013-11-20 at 16:35 +0800, Tiejun Chen wrote: CONFIG_ALTIVEC is always enabled for CoreNet64. In the defconfig perhaps, but this isn't a generally true statement. Yes, but I think we should avoid this probable scenario :) And if we select CONFIG_E{5,6}500_CPU this may introduce -mcpu=e500mc64 into $CFLAGS. But Altivec and Spe options not allowed with e500mc64, so : Sigh. CC arch/powerpc/lib/xor_vmx.o arch/powerpc/lib/xor_vmx.c:1:0: error: AltiVec not supported in this target make[1]: *** [arch/powerpc/lib/xor_vmx.o] Error 1 make: *** [arch/powerpc/lib] Error 2 Signed-off-by: Tiejun Chen tiejun.c...@windriver.com --- arch/powerpc/lib/Makefile |3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 95a20e1..641a77d 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -40,5 +40,8 @@ obj-y += code-patching.o obj-y += feature-fixups.o obj-$(CONFIG_FTR_FIXUP_SELFTEST) += feature-fixups-test.o +# Altivec and Spe options not allowed with e500mc64 in GCC. +ifeq ($(call cc-option-yn,-mcpu=e500mc64),n) obj-$(CONFIG_ALTIVEC) += xor_vmx.o CFLAGS_xor_vmx.o += -maltivec -mabi=altivec +endif This does not seem like the right fix. What if GCC supports both -mcpu=e500mc64 and -mcpu=e6500, and we're using the latter? Or for that I can understand what you mean, but in current kernel, -mcpu=e500mc64 should be excluded from -mcpu=e6500, arch/powerpc/Makefile: E5500_CPU := $(call cc-option,-mcpu=e500mc64,-mcpu=powerpc64) CFLAGS-$(CONFIG_E5500_CPU) += $(E5500_CPU) CFLAGS-$(CONFIG_E6500_CPU) += $(call cc-option,-mcpu=e6500,$(E5500_CPU)) But unfortunately, another place also use the same option, lib/raid6/Makefile: raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o ... ifeq ($(CONFIG_ALTIVEC),y) altivec_flags := -maltivec -mabi=altivec endif Looks we have to do something in this common Makefile file as well, but it may be a bit ugly if still judge some cpu-specific flags... So what about this version? diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 607acf5..872a85c 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -127,7 +127,12 @@ CFLAGS-$(CONFIG_POWER5_CPU) += $(call cc-option,-mcpu=power5) CFLAGS-$(CONFIG_POWER6_CPU) += $(call cc-option,-mcpu=power6) CFLAGS-$(CONFIG_POWER7_CPU) += $(call cc-option,-mcpu=power7) +# Altivec and Spe options not allowed with e500mc64 in GCC. +ifeq ($(CONFIG_ALTIVEC),) E5500_CPU := $(call cc-option,-mcpu=e500mc64,-mcpu=powerpc64) +else +E5500_CPU := -mcpu=powerpc64 +endif CFLAGS-$(CONFIG_E5500_CPU) += $(E5500_CPU) CFLAGS-$(CONFIG_E6500_CPU) += $(call cc-option,-mcpu=e6500,$(E5500_CPU)) Tiejun matter, if we're using -mcpu=whatever-ibm-chip-has-this? Plus, wouldn't you need to do something to prevent code in that file from being called? -Scott ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [v6][PATCH 0/5] powerpc/book3e: powerpc/book3e: make kgdb to work well
On 10/23/2013 05:31 PM, Tiejun Chen wrote: Scott, Tested on fsl-p5040 DS. Scott, Any comments to this version? Tiejun v6: * rebase * change the C code to initialize the exception stack addresses in the PACA instead. * Clear the PACA_IRQ_HARD_DIS force to exit directly from this debug exception without replaying interrupt. * so drop book3e/kgdb: update thread's dbcr0. v5: * rebase on merge branch. Note the original patch, [ATCH 5/7] kgdb/kgdbts: support ppc64, is already merged by Jason. v4: * use DEFINE_PER_CPU to allocate kgdb's thread_info * add patch 7 to make usre copy thread_info only !__check_irq_replay * leave andi. r14,r11,MSR_PR out of #ifndef CONFIG_KGDB since cr0 is still used lately. * retest v3: * make work when enable CONFIG_RELOCATABLE * fix one typo in patch, powerpc/book3e: store critical/machine/debug exception thread info: ld r1,PACAKSAVE(r13); - ld r14,PACAKSAVE(r13); * remove copying the thread_info since booke and book3e always copy the thead_info now when we enter the debug exception, and so drop the v2 patch, book3e/kgdb: Fix a single stgep case of lazy IRQ v2: * Make sure we cover CONFIG_PPC_BOOK3E_64 safely * Use LOAD_REG_IMMEDIATE() to load properly the value of the constant expression in load debug exception stack * Copy thread infor form the kernel stack coming from usr * Rebase latest powerpc git tree v1: * Copy thread info only when we are from !user mode since we'll get kernel stack coming from usr directly. * remove save/restore EX_R14/EX_R15 since DBG_EXCEPTION_PROLOG already covered this. * use CURRENT_THREAD_INFO() conveniently to get thread. * fix some typos * add a patch to make sure gdb can generate a single step properly to invoke a kgdb state. * add a patch to if we need to replay an interrupt, we shouldn't restore that previous backup thread info to make sure we can replay an interrupt lately with a proper thread info. * rebase latest powerpc git tree v0: This patchset is used to support kgdb for book3e. Tiejun Chen (5): powerpc/book3e: initialize crit/mc/dbg kernel stack pointers powerpc/book3e: store crit/mc/dbg exception thread info powerpc/book3e: support kgdb for kernel space powerpc/kgdb: use DEFINE_PER_CPU to allocate kgdb's thread_info powerpc/book3e/kgdb: Fix a single stgep case of lazy IRQ arch/powerpc/kernel/exceptions-64e.S | 26 ++ arch/powerpc/kernel/kgdb.c | 13 ++--- arch/powerpc/kernel/setup_64.c | 18 -- 3 files changed, 44 insertions(+), 13 deletions(-) Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [v5][PATCH 2/6] powerpc/book3e: store critical/machine/debug exception thread info
On 10/19/2013 06:43 AM, Scott Wood wrote: On Thu, 2013-06-20 at 18:28 +0800, Tiejun Chen wrote: We need to store thread info to these exception thread info like something we already did for PPC32. Signed-off-by: Tiejun Chen tiejun.c...@windriver.com --- arch/powerpc/kernel/exceptions-64e.S | 15 +++ 1 file changed, 15 insertions(+) diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 4d8e57f..07cf657 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -67,6 +67,18 @@ std r10,PACA_##level##_STACK(r13); #endif +/* Store something to exception thread info */ +#defineBOOK3E_STORE_EXC_LEVEL_THEAD_INFO(type) \ + ld r14,PACAKSAVE(r13); \ + CURRENT_THREAD_INFO(r14, r14); \ + CURRENT_THREAD_INFO(r15, r1); \ + ld r10,TI_FLAGS(r14); \ + std r10,TI_FLAGS(r15); \ + ld r10,TI_PREEMPT(r14); \ + std r10,TI_PREEMPT(r15); \ + ld r10,TI_TASK(r14); \ + std r10,TI_TASK(r15); Where is type used? Yes, its noting now but its worth leaving this to extend something in the future. BTW, no need for a BOOK3E prefix for things local to this file. What about EXC_LEVEL_EXCEPTION_PROLOG? Please see next version. Thanks, Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [v5][PATCH 1/6] powerpc/book3e: load critical/machine/debug exception stack
On 10/19/2013 06:37 AM, Scott Wood wrote: On Thu, 2013-06-20 at 18:28 +0800, Tiejun Chen wrote: We always alloc critical/machine/debug check exceptions. This is different from the normal exception. So we should load these exception stack properly like we did for booke. This is booke. Do you mean like like we did for 32-bit? Yes. And the code is already trying to load the special stack; it just happens that it's loading from a different location than the C code placed the stack addresses. The changelog should point out the specific thing that is being fixed. Here I don't fix anything, and I just want to do the same thing as 32-bit. Signed-off-by: Tiejun Chen tiejun.c...@windriver.com --- arch/powerpc/kernel/exceptions-64e.S | 49 +++--- 1 file changed, 46 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 4b23119..4d8e57f 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -36,6 +36,37 @@ */ #define SPECIAL_EXC_FRAME_SIZE INT_FRAME_SIZE +/* only on book3e */ +#define DBG_STACK_BASE dbgirq_ctx +#define MC_STACK_BASE mcheckirq_ctx +#define CRIT_STACK_BASEcritirq_ctx + +#ifdef CONFIG_RELOCATABLE +#define LOAD_STACK_BASE(reg, level)\ + tovirt(r2,r2); \ + LOAD_REG_ADDR(reg, level##_STACK_BASE); +#else +#define LOAD_STACK_BASE(reg, level)\ + LOAD_REG_IMMEDIATE(reg, level##_STACK_BASE); +#endif + +#ifdef CONFIG_SMP +#define BOOK3E_LOAD_EXC_LEVEL_STACK(level) \ + mfspr r14,SPRN_PIR; \ + slwir14,r14,3; \ + LOAD_STACK_BASE(r10, level);\ + add r10,r10,r14;\ + ld r10,0(r10); \ + addir10,r10,THREAD_SIZE;\ + std r10,PACA_##level##_STACK(r13); +#else +#define BOOK3E_LOAD_EXC_LEVEL_STACK(level) \ + LOAD_STACK_BASE(r10, level);\ + ld r10,0(r10); \ + addir10,r10,THREAD_SIZE;\ + std r10,PACA_##level##_STACK(r13); +#endif It looks like you're loading the stack from *irq_ctx, storing it in PACA_*_stack, and then (immediately after this in the caller) loading it back from PACA_*_STACK. Why not just load it from *irq_ctx and get rid of PACA_*_STACK altogether -- or change the C code to initialize the addresses in the PACA instead, and get ird of *irq_ctx on 64-bit? Okay, I'd like to move forward the c code, please see next version. /* Exception prolog code for all exceptions */ #define EXCEPTION_PROLOG(n, intnum, type, addition) \ mtspr SPRN_SPRG_##type##_SCRATCH,r13; /* get spare registers */ \ @@ -68,20 +99,32 @@ #define SPRN_GDBELL_SRR1 SPRN_GSRR1 #define CRIT_SET_KSTACK \ + andi. r10,r11,MSR_PR; \ + bne 1f; \ + BOOK3E_LOAD_EXC_LEVEL_STACK(CRIT); \ ld r1,PACA_CRIT_STACK(r13);\ - subir1,r1,SPECIAL_EXC_FRAME_SIZE; + subir1,r1,SPECIAL_EXC_FRAME_SIZE; \ The caller will already check MSR_PR and override this if coming from userspace; why do you need to check again here? Looks this is redundant so this will be left out. Thanks, Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [v5][PATCH 3/6] book3e/kgdb: update thread's dbcr0
On 10/19/2013 06:57 AM, Scott Wood wrote: On Thu, 2013-06-20 at 18:28 +0800, Tiejun Chen wrote: gdb always need to generate a single step properly to invoke a kgdb state. But with lazy interrupt, book3e can't always trigger a debug exception with a single step since the current is blocked for handling those pending exception, then we miss that expected dbcr configuration at last to generate a debug exception. What do you mean by the current is blocked? Could you explain more clearly what lazy EE has to do with MSR_DE and DBCR0? I will go another path to make sure the lazy EE doesn't affect KGDB, so please see next version. Thanks, Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [v5][PATCH 4/6] powerpc/book3e: support kgdb for kernel space
On 10/19/2013 06:58 AM, Scott Wood wrote: On Thu, 2013-06-20 at 18:28 +0800, Tiejun Chen wrote: Currently we need to skip this for supporting KGDB. Does it need to depend on CONFIG_KGDB? Either you've fixed the can't quite save properly part, or you haven't. I'm not 100% sure if my change is fine to other scenarios so I have to use this guarantee other stuff are still safe. But if you think we can remove this dependent, I'm happy to do :) Thanks, Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [v5][PATCH 6/6] book3e/kgdb: Fix a single stgep case of lazy IRQ
On 10/19/2013 07:32 AM, Scott Wood wrote: On Thu, 2013-06-20 at 18:28 +0800, Tiejun Chen wrote: When we're in kgdb_singlestep(), we have to work around to get thread_info by copying from the kernel stack before calling kgdb_handle_exception(), then copying it back afterwards. But for PPC64, we have a lazy interrupt implementation. So after copying thread info frome kernle stack, if we need to replay an interrupt, we shouldn't restore that previous backup thread info to make sure we can replay an interrupt lately with a proper thread info. Explain why copying it would be a problem. This would be gone away in next version as well :) Thanks, Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [v5][PATCH 1/6] powerpc/book3e: load critical/machine/debug exception stack
On 10/19/2013 07:55 AM, Scott Wood wrote: On Thu, 2013-06-20 at 18:28 +0800, Tiejun Chen wrote: We always alloc critical/machine/debug check exceptions. This is different from the normal exception. So we should load these exception stack properly like we did for booke. Signed-off-by: Tiejun Chen tiejun.c...@windriver.com --- arch/powerpc/kernel/exceptions-64e.S | 49 +++--- 1 file changed, 46 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 4b23119..4d8e57f 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -36,6 +36,37 @@ */ #define SPECIAL_EXC_FRAME_SIZE INT_FRAME_SIZE +/* only on book3e */ +#define DBG_STACK_BASE dbgirq_ctx +#define MC_STACK_BASE mcheckirq_ctx +#define CRIT_STACK_BASEcritirq_ctx + +#ifdef CONFIG_RELOCATABLE +#define LOAD_STACK_BASE(reg, level)\ + tovirt(r2,r2); \ + LOAD_REG_ADDR(reg, level##_STACK_BASE); Where does r2 come from here, where does it get used, and why do we need tovirt() on book3e? As I remember this should be covered when we boot that capture kernel in kexec/kdump case. Now this is also gone away after move forward the c code. Thanks, Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 6/6 v2] kvm: powerpc: use caching attributes as per linux pte
On 08/01/2013 07:12 PM, Bharat Bhushan wrote: KVM uses same WIM tlb attributes as the corresponding qemu pte. For this we now search the linux pte for the requested page and get these cache caching/coherency attributes from pte. Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com --- v1-v2 - Use Linux pte for wimge rather than RAM/no-RAM mechanism arch/powerpc/include/asm/kvm_host.h |2 +- arch/powerpc/kvm/booke.c|2 +- arch/powerpc/kvm/e500.h |8 +--- arch/powerpc/kvm/e500_mmu_host.c| 31 ++- 4 files changed, 25 insertions(+), 18 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 3328353..583d405 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -535,6 +535,7 @@ struct kvm_vcpu_arch { #endif gpa_t paddr_accessed; gva_t vaddr_accessed; + pgd_t *pgdir; u8 io_gpr; /* GPR used as IO source/target */ u8 mmio_is_bigendian; @@ -592,7 +593,6 @@ struct kvm_vcpu_arch { struct list_head run_list; struct task_struct *run_task; struct kvm_run *kvm_run; - pgd_t *pgdir; spinlock_t vpa_update_lock; struct kvmppc_vpa vpa; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 17722d8..eb2 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -697,7 +697,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) #endif kvmppc_fix_ee_before_entry(); - + vcpu-arch.pgdir = current-mm-pgd; ret = __kvmppc_vcpu_run(kvm_run, vcpu); /* No need for kvm_guest_exit. It's done in handle_exit. diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h index 4fd9650..fc4b2f6 100644 --- a/arch/powerpc/kvm/e500.h +++ b/arch/powerpc/kvm/e500.h @@ -31,11 +31,13 @@ enum vcpu_ftr { #define E500_TLB_NUM 2 /* entry is mapped somewhere in host TLB */ -#define E500_TLB_VALID (1 0) +#define E500_TLB_VALID (1 31) /* TLB1 entry is mapped by host TLB1, tracked by bitmaps */ -#define E500_TLB_BITMAP(1 1) +#define E500_TLB_BITMAP(1 30) /* TLB1 entry is mapped by host TLB0 */ -#define E500_TLB_TLB0 (1 2) +#define E500_TLB_TLB0 (1 29) +/* Lower 5 bits have WIMGE value */ +#define E500_TLB_WIMGE_MASK(0x1f) struct tlbe_ref { pfn_t pfn; /* valid only for TLB0, except briefly */ diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index 1c6a9d7..9b10b0b 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -64,15 +64,6 @@ static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode) return mas3; } -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode) -{ -#ifdef CONFIG_SMP - return (mas2 MAS2_ATTRIB_MASK) | MAS2_M; -#else - return mas2 MAS2_ATTRIB_MASK; -#endif -} - /* * writing shadow tlb entry to host TLB */ @@ -248,10 +239,12 @@ static inline int tlbe_is_writable(struct kvm_book3e_206_tlb_entry *tlbe) static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref, struct kvm_book3e_206_tlb_entry *gtlbe, -pfn_t pfn) +pfn_t pfn, int wimg) { ref-pfn = pfn; ref-flags |= E500_TLB_VALID; + /* Use guest supplied MAS2_G and MAS2_E */ + ref-flags |= (gtlbe-mas2 MAS2_ATTRIB_MASK) | wimg; if (tlbe_is_writable(gtlbe)) kvm_set_pfn_dirty(pfn); @@ -312,8 +305,7 @@ static void kvmppc_e500_setup_stlbe( /* Force IPROT=0 for all guest mappings. */ stlbe-mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID; - stlbe-mas2 = (gvaddr MAS2_EPN) | - e500_shadow_mas2_attrib(gtlbe-mas2, pr); + stlbe-mas2 = (gvaddr MAS2_EPN) | (ref-flags E500_TLB_WIMGE_MASK); stlbe-mas7_3 = ((u64)pfn PAGE_SHIFT) | e500_shadow_mas3_attrib(gtlbe-mas7_3, pr); @@ -332,6 +324,8 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, unsigned long hva; int pfnmap = 0; int tsize = BOOK3E_PAGESZ_4K; + pte_t pte; + int wimg = 0; /* * Translate guest physical to true physical, acquiring @@ -437,6 +431,8 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, if (likely(!pfnmap)) { unsigned long tsize_pages = 1 (tsize + 10 - PAGE_SHIFT); + pgd_t *pgdir; + pfn = gfn_to_pfn_memslot(slot, gfn); if (is_error_noslot_pfn(pfn)) { printk(KERN_ERR Couldn't get real page for gfn %lx!\n, @@ -447,9 +443,18 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500
Re: [PATCH 5/6 v2] kvm: powerpc: booke: Add linux pte lookup like booke3s
On 08/01/2013 07:12 PM, Bharat Bhushan wrote: KVM need to lookup linux pte for getting TLB attributes (WIMGE). This is similar to how book3s does. This will be used in follow-up patches. Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com --- v1-v2 - This is a new change in this version arch/powerpc/include/asm/kvm_booke.h | 73 ++ 1 files changed, 73 insertions(+), 0 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h index d3c1eb3..903624d 100644 --- a/arch/powerpc/include/asm/kvm_booke.h +++ b/arch/powerpc/include/asm/kvm_booke.h @@ -102,4 +102,77 @@ static inline ulong kvmppc_get_msr(struct kvm_vcpu *vcpu) { return vcpu-arch.shared-msr; } + +/* + * Lock and read a linux PTE. If it's present and writable, atomically + * set dirty and referenced bits and return the PTE, otherwise return 0. + */ +static inline pte_t kvmppc_read_update_linux_pte(pte_t *p, int writing) +{ + pte_t pte; + +#ifdef PTE_ATOMIC_UPDATES + pte_t tmp; +/* wait until _PAGE_BUSY is clear then set it atomically */ +#ifdef CONFIG_PPC64 + __asm__ __volatile__ ( + 1:ldarx %0,0,%3\n + andi. %1,%0,%4\n + bne-1b\n + ori %1,%0,%4\n + stdcx. %1,0,%3\n + bne-1b + : =r (pte), =r (tmp), =m (*p) + : r (p), i (_PAGE_BUSY) + : cc); +#else +__asm__ __volatile__ ( +1: lwarx %0,0,%3\n + andi. %1,%0,%4\n + bne-1b\n + ori %1,%0,%4\n + stwcx. %1,0,%3\n + bne-1b +: =r (pte), =r (tmp), =m (*p) +: r (p), i (_PAGE_BUSY) +: cc); +#endif +#else + pte = pte_val(*p); +#endif + + if (pte_present(pte)) { + pte = pte_mkyoung(pte); + if (writing pte_write(pte)) + pte = pte_mkdirty(pte); + } + + *p = pte; /* clears _PAGE_BUSY */ + + return pte; +} + +static inline pte_t lookup_linux_pte(pgd_t *pgdir, unsigned long hva, + int writing, unsigned long *pte_sizep) Looks this function is as same as book3s, so why not improve that as common :) Tiejun +{ + pte_t *ptep; + unsigned long ps = *pte_sizep; + unsigned int shift; + + ptep = find_linux_pte_or_hugepte(pgdir, hva, shift); + if (!ptep) + return __pte(0); + if (shift) + *pte_sizep = 1ul shift; + else + *pte_sizep = PAGE_SIZE; + + if (ps *pte_sizep) + return __pte(0); + if (!pte_present(*ptep)) + return __pte(0); + + return kvmppc_read_update_linux_pte(ptep, writing); +} + #endif /* __ASM_KVM_BOOKE_H__ */ ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 3/6 v2] kvm: powerpc: allow guest control G attribute in mas2
On 08/01/2013 07:12 PM, Bharat Bhushan wrote: G bit in MAS2 indicates whether the page is Guarded. There is no reason to stop guest setting E, so allow him. Could we merge patch 2 and 3 into only one. Tiejun Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com --- v1-v2 - no change arch/powerpc/kvm/e500.h |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h index 277cb18..4fd9650 100644 --- a/arch/powerpc/kvm/e500.h +++ b/arch/powerpc/kvm/e500.h @@ -117,7 +117,7 @@ static inline struct kvmppc_vcpu_e500 *to_e500(struct kvm_vcpu *vcpu) #define E500_TLB_USER_PERM_MASK (MAS3_UX|MAS3_UR|MAS3_UW) #define E500_TLB_SUPER_PERM_MASK (MAS3_SX|MAS3_SR|MAS3_SW) #define MAS2_ATTRIB_MASK \ - (MAS2_X0 | MAS2_X1 | MAS2_E) + (MAS2_X0 | MAS2_X1 | MAS2_E | MAS2_G) #define MAS3_ATTRIB_MASK \ (MAS3_U0 | MAS3_U1 | MAS3_U2 | MAS3_U3 \ | E500_TLB_USER_PERM_MASK | E500_TLB_SUPER_PERM_MASK) ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: BUG: 32 Bit Kernel kexec hangs on P2020
On 07/10/2013 06:00 PM, Stefani Seibold wrote: Am Mittwoch, den 10.07.2013, 16:48 +0800 schrieb tiejun.chen: On 07/10/2013 04:39 PM, Stefani Seibold wrote: Hi, i have tried to kexec a 32 bit kernel on a Freescale P2020 dual core CPU (e500v2, revison 5.1 - pvr 8021 1051), but Kexec will hang after the Bye!. The host and the kexec kernel are the same, based on the current kernel version 3.10. I have tried it with kexec-tools 2.0.4.git released 30 June 2013. Invoking kexec with kexec --command-line 1 maxcpus=1 noirqdistrib reset_devices $(cat /proc/cmdline) -t elf-ppc --dtb=rs2020.dtb \ --reuse-node=/cpus/PowerPC,P2020@0/timebase-frequency \ --reuse-node=/cpus/PowerPC,P2020@0/bus-frequency \ --reuse-node=/cpus/PowerPC,P2020@0/clock-frequency \ --reuse-node=/cpus/PowerPC,P2020@0/next-level-cache \ --reuse-node=/cpus/PowerPC,P2020@1/timebase-frequency \ --reuse-node=/cpus/PowerPC,P2020@1/bus-frequency \ --reuse-node=/cpus/PowerPC,P2020@1/clock-frequency \ --reuse-node=/cpus/PowerPC,P2020@1/next-level-cache \ --reuse-node=/cpus/PowerPC,P2020@1/cpu-release-addr \ --reuse-node=/cpus/PowerPC,P2020@1/enable-method \ --reuse-node=/soc@ffe0/bus-frequency \ --reuse-node=/soc@ffe0/serial@4500/clock-frequency \ --reuse-node=/soc@ffe0/ethernet@24000/local-mac-address \ -d -l -x vmlinux kexec -e Could you try again with one simple command like, kexec -l vmlinux --append=`cat /proc/cmdline`;kexec -e Great, this works. I did more test and the issue for the fail is the passing of the device tree. But the device tree is exact the same as the previous used one. Is passing a new device tree not allowed? No, I think this is not interdicted. I suspect you probably don't pass a proper dtb, so you can take a further test to figure out this problem. For example, in the u-boot prompt, you can use the fdt command to get last dtb after that board setup from u-boot, then pass that directly to try. Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [v1][PATCH 1/1] KVM: PPC: disable preemption when using hard_irq_disable()
On 07/11/2013 03:15 AM, Scott Wood wrote: On 07/10/2013 01:02:19 AM, Tiejun Chen wrote: We should ensure the preemption cannot occur while calling get_paca() insdide hard_irq_disable(), otherwise the paca_struct may be the wrong one just after. And btw, we may update timing stats in this case. The soft-ee mechanism depends on accessing the PACA directly via r13 to avoid this. We probably should be using inline asm to read was_enabled rather than Yes. hoping the compiler doesn't do anything silly. Do you recommend I should directly replace get_paca() with local_paca inside hard_irq_disable()? #define hard_irq_disable() do {\ u8 _was_enabled = get_paca()-soft_enabled; \ - u8 _was_enabled = local_paca-soft_enabled; But is this safe for all scenarios? Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [v2][PATCH 1/7] powerpc/book3e: support CONFIG_RELOCATABLE
On 07/02/2013 01:00 PM, Bhushan Bharat-R65777 wrote: -Original Message- From: Linuxppc-dev [mailto:linuxppc-dev- bounces+bharat.bhushan=freescale@lists.ozlabs.org] On Behalf Of Tiejun Chen Sent: Thursday, June 20, 2013 1:23 PM To: b...@kernel.crashing.org Cc: linuxppc-dev@lists.ozlabs.org; linux-ker...@vger.kernel.org Subject: [v2][PATCH 1/7] powerpc/book3e: support CONFIG_RELOCATABLE book3e is different with book3s since 3s includes the exception vectors code in head_64.S as it relies on absolute addressing which is only possible within this compilation unit. So we have to get that label address with got. And when boot a relocated kernel, we should reset ipvr properly again after .relocate. Signed-off-by: Tiejun Chen tiejun.c...@windriver.com --- [snip] int *src, *dest; unsigned long length; +#ifdef CONFIG_PPC_BOOK3E + extern char interrupt_end_book3e[]; +#endif Cannot we do this in arch/powerpc/kernel/asm/sections.h if (PHYSICAL_START == 0) return; src = (int *)(KERNELBASE + PHYSICAL_START); dest = (int *)KERNELBASE; +#ifdef CONFIG_PPC_BOOK3E + length = (interrupt_end_book3e - _stext) / sizeof(int); +#else length = (__end_interrupts - _stext) / sizeof(int); +#endif can we keep same name in books and booke; __end_interrupts ? this way we can avoid such #ifdefs Yes, I think I can simplify this as you pointed :) Thanks, Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [v2][PATCH 2/7] book3e/kexec/kdump: enable kexec for kernel
On 07/02/2013 01:17 PM, Bhushan Bharat-R65777 wrote: -Original Message- From: Linuxppc-dev [mailto:linuxppc-dev- bounces+bharat.bhushan=freescale@lists.ozlabs.org] On Behalf Of Tiejun Chen Sent: Thursday, June 20, 2013 1:23 PM To: b...@kernel.crashing.org Cc: linuxppc-dev@lists.ozlabs.org; linux-ker...@vger.kernel.org Subject: [v2][PATCH 2/7] book3e/kexec/kdump: enable kexec for kernel We need to active KEXEC for book3e and bypass or convert non-book3e stuff in kexec coverage. Signed-off-by: Tiejun Chen tiejun.c...@windriver.com --- arch/powerpc/Kconfig |2 +- arch/powerpc/kernel/machine_kexec_64.c |6 ++ arch/powerpc/kernel/misc_64.S |6 ++ 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index c33e3ad..6ecf3c9 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -364,7 +364,7 @@ config ARCH_ENABLE_MEMORY_HOTREMOVE config KEXEC bool kexec system call - depends on (PPC_BOOK3S || FSL_BOOKE || (44x !SMP)) + depends on (PPC_BOOK3S || FSL_BOOKE || (44x !SMP)) || PPC_BOOK3E help kexec is a system call that implements the ability to shutdown your current kernel, and to start another kernel. It is like a reboot diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index 611acdf..ef39271 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -33,6 +33,7 @@ int default_machine_kexec_prepare(struct kimage *image) { int i; +#ifndef CONFIG_PPC_BOOK3E unsigned long begin, end; /* limits of segment */ unsigned long low, high;/* limits of blocked memory range */ struct device_node *node; @@ -41,6 +42,7 @@ int default_machine_kexec_prepare(struct kimage *image) if (!ppc_md.hpte_clear_all) return -ENOENT; +#endif Do we really need this function for book3e? can we have a separate function rather than multiple confusing ifdef? I prefer we have a separate function to book3e. Thanks Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [v2][PATCH 4/7] book3e/kexec/kdump: introduce a kexec kernel flag
On 07/02/2013 01:37 PM, Bhushan Bharat-R65777 wrote: -Original Message- From: Linuxppc-dev [mailto:linuxppc-dev- bounces+bharat.bhushan=freescale@lists.ozlabs.org] On Behalf Of Tiejun Chen Sent: Thursday, June 20, 2013 1:23 PM To: b...@kernel.crashing.org Cc: linuxppc-dev@lists.ozlabs.org; linux-ker...@vger.kernel.org Subject: [v2][PATCH 4/7] book3e/kexec/kdump: introduce a kexec kernel flag We need to introduce a flag to indicate we're already running a kexec kernel then we can go proper path. For example, We shouldn't access spin_table from the bootloader to up any secondary cpu for kexec kernel, and kexec kernel already know how to jump to generic_secondary_smp_init. Signed-off-by: Tiejun Chen tiejun.c...@windriver.com --- [snip] +++ b/arch/powerpc/platforms/85xx/smp.c @@ -150,6 +150,9 @@ static int __cpuinit smp_85xx_kick_cpu(int nr) int hw_cpu = get_hard_smp_processor_id(nr); int ioremappable; int ret = 0; +#if defined(CONFIG_KEXEC) || defined(CONFIG_CRASH_DUMP) + unsigned long *ptr; +#endif What about if we can remove the ifdef around *ptr ... WARN_ON(nr 0 || nr = NR_CPUS); WARN_ON(hw_cpu 0 || hw_cpu = NR_CPUS); @@ -238,11 +241,22 @@ out: #else smp_generic_kick_cpu(nr); +#if defined(CONFIG_KEXEC) || defined(CONFIG_CRASH_DUMP) + ptr = (unsigned long *)((unsigned long)__run_at_kexec); ... #endif here ... + /* We shouldn't access spin_table from the bootloader to up any +* secondary cpu for kexec kernel, and kexec kernel already +* know how to jump to generic_secondary_smp_init. +*/ + if (!*ptr) { +#endif ... remove #endif ... flush_spin_table(spin_table); out_be32(spin_table-pir, hw_cpu); out_be64((u64 *)(spin_table-addr_h), __pa((u64)*((unsigned long long *)generic_secondary_smp_init))); flush_spin_table(spin_table); +#if defined(CONFIG_KEXEC) || defined(CONFIG_CRASH_DUMP) + } +#endif --- remove above 3 lines I'd like to try to address your comments next version. Thanks Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [v2][PATCH 1/7] powerpc/book3e: support CONFIG_RELOCATABLE
On 07/03/2013 07:52 PM, Sethi Varun-B16395 wrote: -Original Message- From: Linuxppc-dev [mailto:linuxppc-dev- bounces+varun.sethi=freescale@lists.ozlabs.org] On Behalf Of Tiejun Chen Sent: Thursday, June 20, 2013 1:23 PM To: b...@kernel.crashing.org Cc: linuxppc-dev@lists.ozlabs.org; linux-ker...@vger.kernel.org Subject: [v2][PATCH 1/7] powerpc/book3e: support CONFIG_RELOCATABLE book3e is different with book3s since 3s includes the exception vectors code in head_64.S as it relies on absolute addressing which is only possible within this compilation unit. So we have to get that label address with got. And when boot a relocated kernel, we should reset ipvr properly again after .relocate. Signed-off-by: Tiejun Chen tiejun.c...@windriver.com --- arch/powerpc/include/asm/exception-64e.h |8 arch/powerpc/kernel/exceptions-64e.S | 15 ++- arch/powerpc/kernel/head_64.S| 22 ++ arch/powerpc/lib/feature-fixups.c|7 +++ 4 files changed, 51 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/exception-64e.h b/arch/powerpc/include/asm/exception-64e.h index 51fa43e..89e940d 100644 --- a/arch/powerpc/include/asm/exception-64e.h +++ b/arch/powerpc/include/asm/exception-64e.h @@ -214,10 +214,18 @@ exc_##label##_book3e: #define TLB_MISS_STATS_SAVE_INFO_BOLTED #endif +#ifndef CONFIG_RELOCATABLE #define SET_IVOR(vector_number, vector_offset)\ li r3,vector_offset@l; \ ori r3,r3,interrupt_base_book3e@l; \ mtspr SPRN_IVOR##vector_number,r3; +#else +#define SET_IVOR(vector_number, vector_offset) \ + LOAD_REG_ADDR(r3,interrupt_base_book3e);\ + rlwinm r3,r3,0,15,0; \ + ori r3,r3,vector_offset@l; \ + mtspr SPRN_IVOR##vector_number,r3; +#endif [Sethi Varun-B16395] Please add a documentation note here. Okay. #endif /* _ASM_POWERPC_EXCEPTION_64E_H */ diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 645170a..4b23119 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -1097,7 +1097,15 @@ skpinv: addir6,r6,1 /* Increment */ * r4 = MAS0 w/TLBSEL ESEL for the temp mapping */ /* Now we branch the new virtual address mapped by this entry */ +#ifdef CONFIG_RELOCATABLE + /* We have to find out address from lr. */ + bl 1f /* Find our address */ +1: mflrr6 + addir6,r6,(2f - 1b) + tovirt(r6,r6) +#else LOAD_REG_IMMEDIATE(r6,2f) +#endif lis r7,MSR_KERNEL@h ori r7,r7,MSR_KERNEL@l mtspr SPRN_SRR0,r6 @@ -1348,9 +1356,14 @@ _GLOBAL(book3e_secondary_thread_init) mflrr28 b 3b -_STATIC(init_core_book3e) +_GLOBAL(init_core_book3e) /* Establish the interrupt vector base */ +#ifdef CONFIG_RELOCATABLE + tovirt(r2,r2) + LOAD_REG_ADDR(r3, interrupt_base_book3e) #else LOAD_REG_IMMEDIATE(r3, interrupt_base_book3e) +#endif mtspr SPRN_IVPR,r3 sync blr [Sethi Varun-B16395] Please add a documentation note here as well. Okay. diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index b61363d..0942f3a 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -414,12 +414,22 @@ _STATIC(__after_prom_start) /* process relocations for the final address of the kernel */ lis r25,PAGE_OFFSET@highest /* compute virtual base of kernel */ sldir25,r25,32 +#if defined(CONFIG_PPC_BOOK3E) + tovirt(r26,r26) /* on booke, we already run at PAGE_OFFSET */ +#endif lwz r7,__run_at_load-_stext(r26) +#if defined(CONFIG_PPC_BOOK3E) + tophys(r26,r26) /* Restore for the remains. */ +#endif cmplwi cr0,r7,1/* flagged to stay where we are ? */ bne 1f add r25,r25,r26 1:mr r3,r25 bl .relocate +#if defined(CONFIG_PPC_BOOK3E) + /* We should set ivpr again after .relocate. */ + bl .init_core_book3e +#endif #endif [Sethi Varun-B16395] A more detailed note over here would be useful. Okay. /* @@ -447,12 +457,24 @@ _STATIC(__after_prom_start) * variable __run_at_load, if it is set the kernel is treated as relocatable * kernel, otherwise it will be moved to PHYSICAL_START */ +#if defined(CONFIG_PPC_BOOK3E) + tovirt(r26,r26) /* on booke, we already run at PAGE_OFFSET */ +#endif lwz r7,__run_at_load-_stext(r26) +#if defined(CONFIG_PPC_BOOK3E) + tophys(r26,r26) /* Restore for the remains. */ +#endif cmplwi cr0,r7,1 bne 3f +#ifdef CONFIG_PPC_BOOK3E + LOAD_REG_ADDR(r5, interrupt_end_book3e) + LOAD_REG_ADDR(r11, _stext) + sub r5,r5,r11
Re: [v3][PATCH 1/8] powerpc/book3e: rename interrupt_end_book3e with __end_interrupts
On 07/10/2013 01:17 PM, Bhushan Bharat-R65777 wrote: -Original Message- From: Linuxppc-dev [mailto:linuxppc-dev- bounces+bharat.bhushan=freescale@lists.ozlabs.org] On Behalf Of Tiejun Chen Sent: Tuesday, July 09, 2013 1:33 PM To: b...@kernel.crashing.org Cc: linuxppc-dev@lists.ozlabs.org; linux-ker...@vger.kernel.org Subject: [v3][PATCH 1/8] powerpc/book3e: rename interrupt_end_book3e with __end_interrupts We can rename 'interrupt_end_book3e' with '__end_interrupts' then book3s/book3e can share this unique label to make sure we can use this conveniently. I think we can be consistent with start and end names, no? Are you saying to rename 'interrupt_base_book3e' with '__base_interrupts' here? But seems optional since book3s have no this similar start label, so I'd like to keep that as original now. Lets listen other comments firstly. Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [v3][PATCH 7/8] book3e/kexec/kdump: redefine VIRT_PHYS_OFFSET
On 07/10/2013 01:20 PM, Bhushan Bharat-R65777 wrote: -Original Message- From: Linuxppc-dev [mailto:linuxppc-dev- bounces+bharat.bhushan=freescale@lists.ozlabs.org] On Behalf Of Tiejun Chen Sent: Tuesday, July 09, 2013 1:33 PM To: b...@kernel.crashing.org Cc: linuxppc-dev@lists.ozlabs.org; linux-ker...@vger.kernel.org Subject: [v3][PATCH 7/8] book3e/kexec/kdump: redefine VIRT_PHYS_OFFSET Book3e is always aligned 1GB to create TLB so we should use (KERNELBASE - MEMORY_START) as VIRT_PHYS_OFFSET to get __pa/__va properly while boot kdump. Signed-off-by: Tiejun Chen tiejun.c...@windriver.com --- arch/powerpc/include/asm/page.h |2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 988c812..5b00081 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -112,6 +112,8 @@ extern long long virt_phys_offset; /* See Description below for VIRT_PHYS_OFFSET */ #ifdef CONFIG_RELOCATABLE_PPC32 #define VIRT_PHYS_OFFSET virt_phys_offset +#elif defined(CONFIG_PPC_BOOK3E_64) +#define VIRT_PHYS_OFFSET (KERNELBASE - MEMORY_START) Can you please explain this code a bit more. I am not understanding this part:) Nothing is special, we only need to redefine this to make sure __va()/__pa() can work well for BOOk3E-64 in BOOKE case: #ifdef CONFIG_BOOKE #define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) + VIRT_PHYS_OFFSET)) #define __pa(x) ((unsigned long)(x) - VIRT_PHYS_OFFSET) And the arch/powerpc/include/asm/page.h file has more descriptions inline :) Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 4/6 v5] KVM: PPC: exit to user space on ehpriv instruction
On 06/26/2013 01:42 PM, Bharat Bhushan wrote: ehpriv instruction is used for setting software breakpoints by user space. This patch adds support to exit to user space with run-debug have relevant information. As this is the first point we are using run-debug, also defined the run-debug structure. Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com --- arch/powerpc/include/asm/disassemble.h |4 arch/powerpc/include/uapi/asm/kvm.h| 21 + arch/powerpc/kvm/e500_emulate.c| 27 +++ 3 files changed, 48 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/disassemble.h b/arch/powerpc/include/asm/disassemble.h index 9b198d1..856f8de 100644 --- a/arch/powerpc/include/asm/disassemble.h +++ b/arch/powerpc/include/asm/disassemble.h @@ -77,4 +77,8 @@ static inline unsigned int get_d(u32 inst) return inst 0x; } +static inline unsigned int get_oc(u32 inst) +{ + return (inst 11) 0x7fff; +} #endif /* __ASM_PPC_DISASSEMBLE_H__ */ diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 0fb1a6e..ded0607 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -269,7 +269,24 @@ struct kvm_fpu { __u64 fpr[32]; }; +/* + * Defines for h/w breakpoint, watchpoint (read, write or both) and + * software breakpoint. + * These are used as type in KVM_SET_GUEST_DEBUG ioctl and status + * for KVM_DEBUG_EXIT. + */ +#define KVMPPC_DEBUG_NONE 0x0 +#define KVMPPC_DEBUG_BREAKPOINT(1UL 1) +#define KVMPPC_DEBUG_WATCH_WRITE (1UL 2) +#define KVMPPC_DEBUG_WATCH_READ(1UL 3) struct kvm_debug_exit_arch { + __u64 address; + /* +* exiting to userspace because of h/w breakpoint, watchpoint +* (read, write or both) and software breakpoint. +*/ + __u32 status; + __u32 reserved; }; /* for KVM_SET_GUEST_DEBUG */ @@ -281,10 +298,6 @@ struct kvm_guest_debug_arch { * Type denotes h/w breakpoint, read watchpoint, write * watchpoint or watchpoint (both read and write). */ -#define KVMPPC_DEBUG_NONE 0x0 -#define KVMPPC_DEBUG_BREAKPOINT(1UL 1) -#define KVMPPC_DEBUG_WATCH_WRITE (1UL 2) -#define KVMPPC_DEBUG_WATCH_READ(1UL 3) __u32 type; __u32 reserved; } bp[16]; diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index b10a012..dab9d07 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -26,6 +26,8 @@ #define XOP_TLBRE 946 #define XOP_TLBWE 978 #define XOP_TLBILX 18 +#define XOP_EHPRIV 270 +#define EHPRIV_OC_DEBUG 0 As I think the case, OC = 0, is a bit specific since IIRC, if the OC operand is omitted, its equal 0 by default. So I think we should start this OC value from 1 or other magic number. And if possible, we'd better add some comments to describe this to make the OC definition readable. Tiejun #ifdef CONFIG_KVM_E500MC static int dbell2prio(ulong param) @@ -82,6 +84,26 @@ static int kvmppc_e500_emul_msgsnd(struct kvm_vcpu *vcpu, int rb) } #endif +static int kvmppc_e500_emul_ehpriv(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int inst, int *advance) +{ + int emulated = EMULATE_DONE; + + switch (get_oc(inst)) { + case EHPRIV_OC_DEBUG: + run-exit_reason = KVM_EXIT_DEBUG; + run-debug.arch.address = vcpu-arch.pc; + run-debug.arch.status = 0; + kvmppc_account_exit(vcpu, DEBUG_EXITS); + emulated = EMULATE_EXIT_USER; + *advance = 0; + break; + default: + emulated = EMULATE_FAIL; + } + return emulated; +} + int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int inst, int *advance) { @@ -130,6 +152,11 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, emulated = kvmppc_e500_emul_tlbivax(vcpu, ea); break; + case XOP_EHPRIV: + emulated = kvmppc_e500_emul_ehpriv(run, vcpu, inst, + advance); + break; + default: emulated = EMULATE_FAIL; } ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 4/6 v5] KVM: PPC: exit to user space on ehpriv instruction
On 06/26/2013 04:44 PM, Bhushan Bharat-R65777 wrote: -Original Message- From: tiejun.chen [mailto:tiejun.c...@windriver.com] Sent: Wednesday, June 26, 2013 12:25 PM To: Bhushan Bharat-R65777 Cc: kvm-...@vger.kernel.org; k...@vger.kernel.org; ag...@suse.de; Wood Scott- B07421; b...@kernel.crashing.org; linuxppc-dev@lists.ozlabs.org; linux- ker...@vger.kernel.org; mi...@neuling.org; Bhushan Bharat-R65777 Subject: Re: [PATCH 4/6 v5] KVM: PPC: exit to user space on ehpriv instruction On 06/26/2013 01:42 PM, Bharat Bhushan wrote: ehpriv instruction is used for setting software breakpoints by user space. This patch adds support to exit to user space with run-debug have relevant information. As this is the first point we are using run-debug, also defined the run-debug structure. Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com --- arch/powerpc/include/asm/disassemble.h |4 arch/powerpc/include/uapi/asm/kvm.h| 21 + arch/powerpc/kvm/e500_emulate.c| 27 +++ 3 files changed, 48 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/disassemble.h b/arch/powerpc/include/asm/disassemble.h index 9b198d1..856f8de 100644 --- a/arch/powerpc/include/asm/disassemble.h +++ b/arch/powerpc/include/asm/disassemble.h @@ -77,4 +77,8 @@ static inline unsigned int get_d(u32 inst) return inst 0x; } +static inline unsigned int get_oc(u32 inst) +{ + return (inst 11) 0x7fff; +} #endif /* __ASM_PPC_DISASSEMBLE_H__ */ diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 0fb1a6e..ded0607 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -269,7 +269,24 @@ struct kvm_fpu { __u64 fpr[32]; }; +/* + * Defines for h/w breakpoint, watchpoint (read, write or both) and + * software breakpoint. + * These are used as type in KVM_SET_GUEST_DEBUG ioctl and status + * for KVM_DEBUG_EXIT. + */ +#define KVMPPC_DEBUG_NONE 0x0 +#define KVMPPC_DEBUG_BREAKPOINT(1UL 1) +#define KVMPPC_DEBUG_WATCH_WRITE (1UL 2) +#define KVMPPC_DEBUG_WATCH_READ(1UL 3) struct kvm_debug_exit_arch { + __u64 address; + /* +* exiting to userspace because of h/w breakpoint, watchpoint +* (read, write or both) and software breakpoint. +*/ + __u32 status; + __u32 reserved; }; /* for KVM_SET_GUEST_DEBUG */ @@ -281,10 +298,6 @@ struct kvm_guest_debug_arch { * Type denotes h/w breakpoint, read watchpoint, write * watchpoint or watchpoint (both read and write). */ -#define KVMPPC_DEBUG_NONE 0x0 -#define KVMPPC_DEBUG_BREAKPOINT(1UL 1) -#define KVMPPC_DEBUG_WATCH_WRITE (1UL 2) -#define KVMPPC_DEBUG_WATCH_READ(1UL 3) __u32 type; __u32 reserved; } bp[16]; diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index b10a012..dab9d07 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -26,6 +26,8 @@ #define XOP_TLBRE 946 #define XOP_TLBWE 978 #define XOP_TLBILX 18 +#define XOP_EHPRIV 270 +#define EHPRIV_OC_DEBUG 0 As I think the case, OC = 0, is a bit specific since IIRC, if the OC operand is omitted, its equal 0 by default. So I think we should start this OC value from 1 or other magic number. ehpriv instruction is defined to be used as: ehpriv OC // where OC can be 0,1, ... n and in extended for it can be used as ehpriv // With no OC, and here it assumes OC = 0 So OC = 0 is not specific but ehpriv is same as ehpriv 0. Yes, this is just what I mean. I do not think of any special reason to reserve ehpriv and ehpriv 0. So I still prefer we can reserve the 'ehpriv' without OC operand as one simple approach to test or develop something for KVM quickly because its really convenient to trap into the hypervisor only with one 'ehpriv' instruction easily. But I have no further objection if you guys are fine to this ;-) Tiejun Thanks -Bharat And if possible, we'd better add some comments to describe this to make the OC definition readable. Tiejun #ifdef CONFIG_KVM_E500MC static int dbell2prio(ulong param) @@ -82,6 +84,26 @@ static int kvmppc_e500_emul_msgsnd(struct kvm_vcpu *vcpu, int rb) } #endif +static int kvmppc_e500_emul_ehpriv(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int inst, int *advance) +{ + int emulated = EMULATE_DONE; + + switch (get_oc(inst)) { + case EHPRIV_OC_DEBUG: + run-exit_reason = KVM_EXIT_DEBUG; + run-debug.arch.address = vcpu-arch.pc; + run-debug.arch.status = 0; + kvmppc_account_exit(vcpu, DEBUG_EXITS); + emulated
Re: fsqrt
On 06/07/2013 03:41 PM, Benjamin Herrenschmidt wrote: Another question... Do you guys happen to have a patch to emulate fsqrt in the kernel ? Seems this is already emulated: arch/powerpc/math-emu/fsqrt.c You can enable CONFIG_MATH_EMULATION to try. Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: fsqrt
On 06/07/2013 04:53 PM, Benjamin Herrenschmidt wrote: On Fri, 2013-06-07 at 15:46 +0800, tiejun.chen wrote: On 06/07/2013 03:41 PM, Benjamin Herrenschmidt wrote: Another question... Do you guys happen to have a patch to emulate fsqrt in the kernel ? Seems this is already emulated: arch/powerpc/math-emu/fsqrt.c You can enable CONFIG_MATH_EMULATION to try. Is math emu expected to work at all on top of a real FPU ? I though it didn't ... maybe I'm wrong. As I understand often the real FPU can't support all float instructions, so we have to enable this to emulate those unsupported float instructions in that scenario. Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: can't access PCIe card under sbc8548
On 05/30/2013 03:19 PM, wolfking wrote: hi, tiejun.chen: Thanks for replying. I tried to use ioremap too, but it doesn't work. The ioport_map method What happened? Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: can't access PCIe card under sbc8548
On 05/30/2013 05:15 PM, wolfking wrote: hi, tiejun.chen: When I use ioremap, the card seems to work fine. That is, I can access Yes, ioremap() should work for MMIO. part of all register. My PCIe card is a rs232 expand card, it has some standard UART register, for example the SCR(scratch register). My driver can access the SCR(write and read) normally, but the other registers behave odd. For example, the DLM should be 0, but it reads 5. The card has a software reset bit, when it is set to 1, the card reset itself. When it finished reset, this reset bit should be back to 0. But In sbc8548, when I set this bit, it remains high. So I guess, the area I accessed is not the PCIe card, I suspect you're missing some load/storage sync code, so what is your R/W function exactly? Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: can't access PCIe card under sbc8548
On 05/30/2013 06:02 PM, wolfking wrote: I tried several R/W functions: inb/outb and ioread8/iowrite8. The inb/outb What about out_be32/in_be32? Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: can't access PCIe card under sbc8548
On 05/30/2013 03:32 PM, wolfking wrote: (continued) I traced the 8139too.c when it uses pci_iomap, the pci_iomap called the ioport_map. The difference between 8139 and my PCIe card lies in the port value : void __iomem *ioport_map(unsigned long port, unsigned int len) { return (void __iomem *) (port + _IO_BASE); _IO_BASE is equal to isa_io_base. So if this is not zero, I think there's a isa bridge in your platform. So you can access these I/O ports based on that isa bridge/bus with ioreadx/iowritex. } in 8139too.c, the port value is 0x1000; for my PCIe card, the port value is 0xfefff000. And the value is got from pci_resource_start. So you see, the But this means the port is as memory-mapped so ioremap() should be workable in this case. Then out_bex/in_bex should be fine. Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: can't access PCIe card under sbc8548
On 05/31/2013 12:24 AM, Scott Wood wrote: On 05/30/2013 07:49:48 AM, wolfking wrote: tiejun.chen wrote On 05/30/2013 03:32 PM, wolfking wrote: (continued) I traced the 8139too.c when it uses pci_iomap, the pci_iomap called the ioport_map. The difference between 8139 and my PCIe card lies in the port value : void __iomem *ioport_map(unsigned long port, unsigned int len) { return (void __iomem *) (port + _IO_BASE); _IO_BASE is equal to isa_io_base. So if this is not zero, I think there's a isa bridge in your platform. So you can access these I/O ports based on that isa bridge/bus with ioreadx/iowritex. I tried ioread8/iowriet8 after ioremap, it doesn't work } in 8139too.c, the port value is 0x1000; for my PCIe card, the port value is 0xfefff000. And the value is got from pci_resource_start. So you see, the But this means the port is as memory-mapped Are you sure? It could mean that it's on a non-primary bus and I/O for this bus We can take a further look at '/proc/ioports', '/proc/iomem' and other message. is mapped at a lower address than the primary. Just because the addition is wrapping around doesn't mean it's wrong. so ioremap() should be workable in this case. Then out_bex/in_bex should be fine. ioremap() and out_bex/in_bex are not appropriate for PCI I/O regions (and presumably that's what it is, if pci_iomap is calling ioport_map). Big-endian is not appropriate for PCI in any case. Yes, I should correct that PCI interprets all accesses as little-endian. Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: can't access PCIe card under sbc8548
On 05/30/2013 11:42 AM, wolfking wrote: hi, all I'm doing some developing on the windriver's sbc8548 board. The kernel I use is 3.6.10 and the u-boot version is 2012-10. I changed the board's configuration: the board now boot from the 64MB SODIMM Flash (not the default 8MB on-board Flash memory), and the PCI clock rate is changed to 33MHZ. Now the trouble I am in is that: the PCI card (a NIC card rtl8139) can be accessed OK, while the PCIe card can't work, that is, the kernel can't access its internal register. The kernel can correctly probe the PCIe card. its BAR0 is a I/O mapped register, I use ioport_map to map the BAR0 to kernel's address space, then use ioread8/iowrite8 to access its internal register, it doesn't work. I analyse the ioport_map function and find it just add the input parameter to a fixed _IO_BASE value, below is the function: void __iomem *ioport_map(unsigned long port, unsigned int len) { return (void __iomem *) (port + _IO_BASE); } the _IO_BASE value under sbc8548 is 0xfd7fd000, the value of ioport_map paramenter port is 0xfefff000. Obviously the add overflows, so the follow-up operations can't succeed. The value of port is got from the function pci_resource_start. In PPC case I/O is memory-mapped, so you should use ioremap() instead of ioport_map().. Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH] powerpc/32bit, PREEMPT:Load TI_FLAGS to check NEED_RESCHED
On 05/27/2013 02:27 PM, Priyanka Jain wrote: Add instruction to load TI_FLAGS in r8 While returning from exception handling in case of PREEMPT enabled, _TIF_NEED_RESCHED bit is checked in TI_FLAGS (thread_info flag) of current task. Only if this bit is set, it should continue with the process of calling preempt_schedule_irq() to schedule highest priority task if available. Current code assumes that r8 contains TI_FLAGS and check this for _TIF_NEED_RESCHED, but as r8 is modified in the code which executes before Could you elaborate this scenario? As I take a look at this path: ... /* Clear _TIF_EMULATE_STACK_STORE flag */ lis r11,_TIF_EMULATE_STACK_STORE@h addir5,r9,TI_FLAGS 0: lwarx r8,0,r5 andcr8,r8,r11 #ifdef CONFIG_IBM405_ERR77 dcbt0,r5 #endif stwcx. r8,0,r5 bne-0b 1: #ifdef CONFIG_PREEMPT /* check current_thread_info-preempt_count */ lwz r0,TI_PREEMPT(r9) cmpwi 0,r0,0 /* if non-zero, just restore regs and return */ bne restore andi. r8,r8,_TIF_NEED_RESCHED ... Where is R8 clobbered? Tiejun this check, r8 no longer contains the expected TI_FLAGS information. As a result check for comparison with _TIF_NEED_RESCHED was failing even if NEED_RESCHED bit is set in the current thread_info flag. Due to this, preempt_schedule_irq() and in turn scheduler was not getting called even if highest priority task is ready for execution. Signed-off-by: Priyanka Jain priyanka.j...@freescale.com --- arch/powerpc/kernel/entry_32.S |1 + 1 files changed, 1 insertions(+), 0 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index d22e73e..0239c7f 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -887,6 +887,7 @@ resume_kernel: #ifdef CONFIG_PREEMPT /* check current_thread_info-preempt_count */ lwz r0,TI_PREEMPT(r9) + lwz r8,TI_FLAGS(r9) cmpwi 0,r0,0 /* if non-zero, just restore regs and return */ bne restore andi. r8,r8,_TIF_NEED_RESCHED ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH] powerpc/32bit, PREEMPT:Load TI_FLAGS to check NEED_RESCHED
On 05/27/2013 02:55 PM, Jain Priyanka-B32167 wrote: If we go some more lines up in the same file, the code is resume_kernel: /* check current_thread_info, _TIF_EMULATE_STACK_STORE */ CURRENT_THREAD_INFO(r9, r1) lwz r8,TI_FLAGS(r9) andis. r8,r8,_TIF_EMULATE_STACK_STORE@h Okay, but could you fix this directly like: diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index e514de5..4498467 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -851,7 +851,7 @@ resume_kernel: /* check current_thread_info, _TIF_EMULATE_STACK_STORE */ CURRENT_THREAD_INFO(r9, r1) lwz r8,TI_FLAGS(r9) - andis. r8,r8,_TIF_EMULATE_STACK_STORE@h + andis. r0,r8,_TIF_EMULATE_STACK_STORE@h beq+1f addir8,r1,INT_FRAME_SIZE/* Get the kprobed function entry */ Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v4 06/12] ARM: dove: add gigabit ethernet and mvmdio device tree nodes
On 05/22/2013 05:43 PM, Sebastian Hesselbarth wrote: On 05/21/2013 07:48 PM, Andrew Lunn wrote: On Tue, May 21, 2013 at 06:41:44PM +0200, Sebastian Hesselbarth wrote: This patch adds orion-eth and mvmdio device tree nodes for DT enabled Dove boards. As there is only one ethernet controller on Dove, a default phy node is also added with a note to set its reg property on a per-board basis. Signed-off-by: Sebastian Hesselbarthsebastian.hesselba...@gmail.com --- ... +ethernet-port@0 { +device_type = network; +compatible = marvell,orion-eth-port; +reg =0; +interrupts =29; +/* overwrite MAC address in bootloader */ +local-mac-address = [00 00 00 00 00 00]; Hi Sebastian Its probably a good idea to set the local administration bit in this MAC address. i.e. first byte is 02. Andrew, we just need an invalid address here to trigger the default behavior of the driver and load the MAC address from its register. As PPC binding documentation also has all zero, I just took it. The truth is in PPC case, often we set the real mac address with some variables like 'eth[x]addr' in u-boot prompt, then u-boot will parse that value to fill the dtb. At last the associated driver can get the actual mac address from the dtb. And especially for those older u-boot version, even you have to reset the 'local-mac-address' property in dts directly with the real mac address before generate the dtb since the older u-boot have no this ability to fill dtb again before pass the kernel. Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: SATA FSL and upstreaming
On 05/16/2013 02:09 PM, Benjamin Herrenschmidt wrote: On Thu, 2013-05-16 at 06:05 +, Zang Roy-R61911 wrote: I do not suggest changing the RCW. If the RCW is broken on Ben's side, it is not easy to recover for him. Let's check the U-boot output first. U-Boot 2013.01-9-g7bcd7f4 (Mar 14 2013 - 14:23:16) CPU0: P5020E, Version: 1.0, (0x82280010) Core: E5500, Version: 1.0, (0x80240010) Clock Configuration: CPU0:2000 MHz, CPU1:2000 MHz, CCB:800 MHz, DDR:666.667 MHz (1333.333 MT/s data rate) (Asynchronous), LBC:100 MHz FMAN1: 600 MHz QMAN: 400 MHz PME: 400 MHz L1:D-cache 32 kB enabled I-cache 32 kB enabled Board: P5020DS, Sys ID: 0x1c, Sys Ver: 0x12, FPGA Ver: 0x05, vBank: 0 Reset Configuration Word (RCW): : 0c54 1e12 0010: d8984a01 03002000 de80 4100 0020: 1007 0030: I think you can use Bharat's RCW, which seems RR_HXAPNSP_0x36, then please take a look at this: The RCW directories names for the p5020ds board conform to the following naming convention: ab_bcdefghi_j: a = 'R' if RGMII@DTSEC4 is supported / 'N' if not available/not used b = 'R' if RGMII@DTSEC5 is supported / 'N if not available/not used c = What is available in Slot 1 or SATA d = What is available in Slot 2 e = What is available in Slot 3 f = What is available in Slot 4 g = What is available in Slot 5 h = What is available in Slot 6 i = What is available in Slot 7 For the Slots (c..i): 'N' if not available/not used 'P' if PCIe 'X' if XAUI 'R' if SRIO 'S' if SGMII 'H' if SATA 'A' is AURORA j = 'hex value of serdes protocol value' So NR_HXAPNSP_0x36 means: - no RGMII@DTSEC4 - RGMII@DTSEC5 - SATA [Slot 1 not used] - XAUI on Slot 2 - AURORA on Slot 3 - PCIE on Slot 4 - SGMII on Slot 6 - PCIE on Slot 7 Slot 5 is not used, and the SERDES Protocol is 0x36. So slot 7 and slot 4 can be as PCIe slot. Tiejun SERDES Reference Clocks: Bank1=100Mhz Bank2=125Mhz Bank3=125Mhz I2C: ready SPI: ready DRAM: Initializingusing SPD Detected UDIMM i-DIMM Detected UDIMM i-DIMM 2 GiB left unmapped 4 GiB (DDR3, 64-bit, CL=9, ECC on) DDR Controller Interleaving Mode: cache line DDR Chip-Select Interleaving Mode: CS0+CS1 Testing 0x - 0x7fff Testing 0x8000 - 0x Remap DDR 2 GiB left unmapped POST memory PASSED Flash: 128 MiB L2:512 KB enabled Corenet Platform Cache: 2048 KB enabled SRIO1: disabled SRIO2: disabled NAND: 1024 MiB MMC: FSL_SDHC: 0 EEPROM: NXID v1 PCIe1: Root Complex, no link, regs @ 0xfe20 PCIe1: Bus 00 - 00 PCIe2: disabled PCIe3: Root Complex, no link, regs @ 0xfe202000 PCIe3: Bus 01 - 01 PCIe4: disabled In:serial Out: serial Err: serial Net: Initializing Fman Fman1: Uploading microcode version 106.1.7 PHY reset timed out PHY reset timed out PHY reset timed out PHY reset timed out FM1@DTSEC1, FM1@DTSEC2, FM1@DTSEC3, FM1@DTSEC4, FM1@DTSEC5, FM1@TGEC1 Hit any key to stop autoboot: 0 = Cheers, Ben. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: SATA FSL and upstreaming
On 05/16/2013 02:20 PM, Zang Roy-R61911 wrote: -Original Message- From: tiejun.chen [mailto:tiejun.c...@windriver.com] Sent: Thursday, May 16, 2013 2:18 PM To: Benjamin Herrenschmidt Cc: Zang Roy-R61911; Liu Qiang-B32616; Fleming Andy-AFLEMING; linuxppc- d...@lists.ozlabs.org; Xie Shaohui-B21989; Bhushan Bharat-R65777 Subject: Re: SATA FSL and upstreaming On 05/16/2013 02:09 PM, Benjamin Herrenschmidt wrote: On Thu, 2013-05-16 at 06:05 +, Zang Roy-R61911 wrote: I do not suggest changing the RCW. If the RCW is broken on Ben's side, it is not easy to recover for him. Let's check the U-boot output first. U-Boot 2013.01-9-g7bcd7f4 (Mar 14 2013 - 14:23:16) CPU0: P5020E, Version: 1.0, (0x82280010) Core: E5500, Version: 1.0, (0x80240010) Clock Configuration: CPU0:2000 MHz, CPU1:2000 MHz, CCB:800 MHz, DDR:666.667 MHz (1333.333 MT/s data rate) (Asynchronous), LBC:100 MHz FMAN1: 600 MHz QMAN: 400 MHz PME: 400 MHz L1:D-cache 32 kB enabled I-cache 32 kB enabled Board: P5020DS, Sys ID: 0x1c, Sys Ver: 0x12, FPGA Ver: 0x05, vBank: 0 Reset Configuration Word (RCW): : 0c54 1e12 0010: d8984a01 03002000 de80 4100 0020: 1007 0030: I think you can use Bharat's RCW, which seems RR_HXAPNSP_0x36, then please take a look at this: Why? I just believe Bharat should pick a proper RCW from SDK. Ben's on board RCW protocol is 0x36, which should work for PCIe1 (slot 7) and PCIe3 (slot4). Didn't you see I'm also saying to use slot 7 and slot 4? Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: SATA FSL and upstreaming
On 05/16/2013 02:21 PM, Benjamin Herrenschmidt wrote: On Thu, 2013-05-16 at 14:17 +0800, tiejun.chen wrote: I think you can use Bharat's RCW, which seems RR_HXAPNSP_0x36, then please take a look at this: Ok, how do I update my RCW to bse Bharat's ? Firstly please check which flash bank is used since we have to know where should be updated RCW. What is SW7[1:4]? Or we have another simple way in u-boot prompt: = md.b ffdf002c ffdf002c: 4f 00 fe 00 39 00 00 00 00 00 00 00 00 00 00 00O...9... ... This means we're on bank4. Any DIP switch setting I need to be aware of ? No. Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: SATA FSL and upstreaming
On 05/16/2013 02:37 PM, Benjamin Herrenschmidt wrote: On Thu, 2013-05-16 at 06:33 +, Bhushan Bharat-R65777 wrote: protect off 0xec00 +$filesize; erase 0xec00 +$filesize; cp.b 0x100 0xec00 $filesize BTW, is it normal that the network in uboot is *extremely* unreliable ? We can use serial port: loadb - load binary file over serial line (kermit mode) loads - load S-Record file over serial line loady - load binary file over serial line (ymodem mode) Then please send the RCW with appropriate mode above in your terminal client. Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: SATA FSL and upstreaming
On 05/16/2013 02:40 PM, Benjamin Herrenschmidt wrote: On Thu, 2013-05-16 at 14:35 +0800, tiejun.chen wrote: On 05/16/2013 02:21 PM, Benjamin Herrenschmidt wrote: On Thu, 2013-05-16 at 14:17 +0800, tiejun.chen wrote: I think you can use Bharat's RCW, which seems RR_HXAPNSP_0x36, then please take a look at this: Ok, how do I update my RCW to bse Bharat's ? Firstly please check which flash bank is used since we have to know where should be updated RCW. What is SW7[1:4]? Or we have another simple way in u-boot prompt: = md.b ffdf002c ffdf002c: 4f 00 fe 00 39 00 00 00 00 00 00 00 00 00 00 00O...9... ... ffdf002c: 0f 00 fe 00 00 00 00 00 00 00 00 00 00 00 00 00 This means we're on bank4. I assume that means bank0 ? Yes, RCW should be burned to 0xec00. In u-boot prompt: = loady ## Ready for binary (ymodem) download to 0x0100 at 115200 bps... C Then send that RCW with ymodem in your terminal client. Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: SATA FSL and upstreaming
On 05/16/2013 02:53 PM, Benjamin Herrenschmidt wrote: On Thu, 2013-05-16 at 06:49 +, Zang Roy-R61911 wrote: Please also provide a RCW binary to Ben, if your guys insist updating the RCW. right, I just noticed it's ascii :-) That isn't going to work well... Ben, I already send my workable combination of u-boot and RCW privately to you, please take a try. Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: SATA FSL and upstreaming
On 05/16/2013 01:45 PM, Benjamin Herrenschmidt wrote: On Thu, 2013-05-16 at 14:47 +1000, Benjamin Herrenschmidt wrote: Hi folks ! So I was trying to use my 5020ds to test some stuff today. Since I hadn't used it in a while, I decided to upgrade it to the latest NOR etc... On another note, I can't seem to get any PCIe card recognized in any slot... This should depend on the RCW. As I recall, slot 7 or slot 4 can be configured to support PCIe for P5020DS. And you can know this information according to RCW's README. Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [v1][KVM][PATCH 1/1] kvm:ppc:booehv: direct ISI exception to Guest
On 05/11/2013 03:39 AM, Alexander Graf wrote: Am 10.05.2013 um 21:22 schrieb Scott Wood scottw...@freescale.com: On 05/10/2013 12:57:33 PM, Alexander Graf wrote: Could you guys please collect performance data during the next weeks on both guest-directed ISIs as well as VF MMIOs (preferably with in-kernel MMIO), so that we can decide on the direction that's worth going towards? Collecting data on VF MMIO would require implementing it (or at least salvaging and fixing some old code), which is not a high priority at the moment. If we do implement VF in the future we could always undo the direct ISI change, but it would still be nice to know if there's any real benefit in the first place. Mike sounded like he had an almost working poc, which is good enough to collect rough numbers. Which can the test case be adopted? Mike, If you already have a good case for your poc, please share that with me. Then I'd like to run that. Tiejun And yes, changes like these should always get at least basic performance numbers along with them, regardless of drawbacks. Alex FWIW, I doubt that the more stress on HW TLB will be significant. -Scott ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [RFC][KVM][PATCH 1/1] kvm:ppc:booke-64: soft-disable interrupts
On 05/09/2013 03:33 PM, Bhushan Bharat-R65777 wrote: -Original Message- From: Linuxppc-dev [mailto:linuxppc-dev- bounces+bharat.bhushan=freescale@lists.ozlabs.org] On Behalf Of Caraman Mihai Claudiu-B02008 Sent: Wednesday, May 08, 2013 6:44 PM To: Wood Scott-B07421; tiejun.chen Cc: linuxppc-dev@lists.ozlabs.org; ag...@suse.de; kvm-...@vger.kernel.org; k...@vger.kernel.org Subject: RE: [RFC][KVM][PATCH 1/1] kvm:ppc:booke-64: soft-disable interrupts This only disable soft interrupt for kvmppc_restart_interrupt() that restarts interrupts if they were meant for the host: a. SOFT_DISABLE_INTS() only for BOOKE_INTERRUPT_EXTERNAL | BOOKE_INTERRUPT_DECREMENTER | BOOKE_INTERRUPT_DOORBELL Those aren't the only exceptions that can end up going to the host. We could get a TLB miss that results in a heavyweight MMIO exit, etc. And shouldn't we handle kvmppc_restart_interrupt() like the original HOST flow? #define MASKABLE_EXCEPTION(trapnum, intnum, label, hdlr, ack) \ START_EXCEPTION(label); \ NORMAL_EXCEPTION_PROLOG(trapnum, intnum, PROLOG_ADDITION_MASKABLE)\ EXCEPTION_COMMON(trapnum, PACA_EXGEN, *INTS_DISABLE*) \ ... Could you elaborate on what you mean? I think Tiejun was saying that host has flags and replays only EE/DEC/DBELL interrupts. There is special macro masked_interrupt_book3e in those exception handlers that sets paca-irq_happened. The list of replied interrupts is limited to asynchronous noncritical interrupts which can be masked by MSR[EE] (therefore no TLB miss). Embedded Perfmon interrupt is also asynchronous, Why that is not in the list of masked interruts. Are you saying perfmon? If so, its also in that list: START_EXCEPTION(perfmon); NORMAL_EXCEPTION_PROLOG(0x260, BOOKE_INTERRUPT_PERFORMANCE_MONITOR, PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x260, PACA_EXGEN, INTS_DISABLE) Tiejun -Bharat Now on KVM book3e we don't want to put them in the irq_happened lazy state but rather to execute them directly, so there is no reason for exception handling symmetry between host and guest. -Mike ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [RFC][KVM][PATCH 1/1] kvm:ppc:booke-64: soft-disable interrupts
On 05/09/2013 03:51 PM, Bhushan Bharat-R65777 wrote: -Original Message- From: tiejun.chen [mailto:tiejun.c...@windriver.com] Sent: Thursday, May 09, 2013 1:18 PM To: Bhushan Bharat-R65777 Cc: Caraman Mihai Claudiu-B02008; Wood Scott-B07421; linuxppc- d...@lists.ozlabs.org; ag...@suse.de; kvm-...@vger.kernel.org; k...@vger.kernel.org Subject: Re: [RFC][KVM][PATCH 1/1] kvm:ppc:booke-64: soft-disable interrupts On 05/09/2013 03:33 PM, Bhushan Bharat-R65777 wrote: -Original Message- From: Linuxppc-dev [mailto:linuxppc-dev- bounces+bharat.bhushan=freescale@lists.ozlabs.org] On Behalf Of bounces+Caraman Mihai Claudiu-B02008 Sent: Wednesday, May 08, 2013 6:44 PM To: Wood Scott-B07421; tiejun.chen Cc: linuxppc-dev@lists.ozlabs.org; ag...@suse.de; kvm-...@vger.kernel.org; k...@vger.kernel.org Subject: RE: [RFC][KVM][PATCH 1/1] kvm:ppc:booke-64: soft-disable interrupts This only disable soft interrupt for kvmppc_restart_interrupt() that restarts interrupts if they were meant for the host: a. SOFT_DISABLE_INTS() only for BOOKE_INTERRUPT_EXTERNAL | BOOKE_INTERRUPT_DECREMENTER | BOOKE_INTERRUPT_DOORBELL Those aren't the only exceptions that can end up going to the host. We could get a TLB miss that results in a heavyweight MMIO exit, etc. And shouldn't we handle kvmppc_restart_interrupt() like the original HOST flow? #define MASKABLE_EXCEPTION(trapnum, intnum, label, hdlr, ack) \ START_EXCEPTION(label); \ NORMAL_EXCEPTION_PROLOG(trapnum, intnum, PROLOG_ADDITION_MASKABLE)\ EXCEPTION_COMMON(trapnum, PACA_EXGEN, *INTS_DISABLE*) \ ... Could you elaborate on what you mean? I think Tiejun was saying that host has flags and replays only EE/DEC/DBELL interrupts. There is special macro masked_interrupt_book3e in those exception handlers that sets paca- irq_happened. The list of replied interrupts is limited to asynchronous noncritical interrupts which can be masked by MSR[EE] (therefore no TLB miss). Embedded Perfmon interrupt is also asynchronous, Why that is not in the list of masked interruts. Are you saying perfmon? If so, its also in that list: START_EXCEPTION(perfmon); NORMAL_EXCEPTION_PROLOG(0x260, BOOKE_INTERRUPT_PERFORMANCE_MONITOR, PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x260, PACA_EXGEN, INTS_DISABLE) Where it is recorded in paca-irq_happned to be replayed later ? In stead of PROLOG_ADDITION_MASKABLE, actually we have nothing to do with PROLOG_ADDITION_NONE for perfmon, so perfmon can be executed without lazy state. Tiejun Tiejun -Bharat Now on KVM book3e we don't want to put them in the irq_happened lazy state but rather to execute them directly, so there is no reason for exception handling symmetry between host and guest. -Mike ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [RFC][KVM][PATCH 1/1] kvm:ppc:booke-64: soft-disable interrupts
On 05/09/2013 04:12 PM, Bhushan Bharat-R65777 wrote: -Original Message- From: Kevin Hao [mailto:haoke...@gmail.com] Sent: Thursday, May 09, 2013 1:38 PM To: Bhushan Bharat-R65777 Cc: tiejun.chen; Caraman Mihai Claudiu-B02008; k...@vger.kernel.org; Wood Scott- B07421; ag...@suse.de; kvm-...@vger.kernel.org; linuxppc-dev@lists.ozlabs.org Subject: Re: [RFC][KVM][PATCH 1/1] kvm:ppc:booke-64: soft-disable interrupts On Thu, May 09, 2013 at 07:51:09AM +, Bhushan Bharat-R65777 wrote: -Original Message- From: tiejun.chen [mailto:tiejun.c...@windriver.com] Sent: Thursday, May 09, 2013 1:18 PM To: Bhushan Bharat-R65777 Cc: Caraman Mihai Claudiu-B02008; Wood Scott-B07421; linuxppc- d...@lists.ozlabs.org; ag...@suse.de; kvm-...@vger.kernel.org; k...@vger.kernel.org Subject: Re: [RFC][KVM][PATCH 1/1] kvm:ppc:booke-64: soft-disable interrupts On 05/09/2013 03:33 PM, Bhushan Bharat-R65777 wrote: -Original Message- From: Linuxppc-dev [mailto:linuxppc-dev- bounces+bharat.bhushan=freescale@lists.ozlabs.org] On Behalf bounces+Of Caraman Mihai Claudiu-B02008 Sent: Wednesday, May 08, 2013 6:44 PM To: Wood Scott-B07421; tiejun.chen Cc: linuxppc-dev@lists.ozlabs.org; ag...@suse.de; kvm-...@vger.kernel.org; k...@vger.kernel.org Subject: RE: [RFC][KVM][PATCH 1/1] kvm:ppc:booke-64: soft-disable interrupts This only disable soft interrupt for kvmppc_restart_interrupt() that restarts interrupts if they were meant for the host: a. SOFT_DISABLE_INTS() only for BOOKE_INTERRUPT_EXTERNAL | BOOKE_INTERRUPT_DECREMENTER | BOOKE_INTERRUPT_DOORBELL Those aren't the only exceptions that can end up going to the host. We could get a TLB miss that results in a heavyweight MMIO exit, etc. And shouldn't we handle kvmppc_restart_interrupt() like the original HOST flow? #define MASKABLE_EXCEPTION(trapnum, intnum, label, hdlr, ack) \ START_EXCEPTION(label); \ NORMAL_EXCEPTION_PROLOG(trapnum, intnum, PROLOG_ADDITION_MASKABLE)\ EXCEPTION_COMMON(trapnum, PACA_EXGEN, *INTS_DISABLE*) \ ... Could you elaborate on what you mean? I think Tiejun was saying that host has flags and replays only EE/DEC/DBELL interrupts. There is special macro masked_interrupt_book3e in those exception handlers that sets paca- irq_happened. The list of replied interrupts is limited to asynchronous noncritical interrupts which can be masked by MSR[EE] (therefore no TLB miss). Embedded Perfmon interrupt is also asynchronous, Why that is not in the list of masked interruts. Are you saying perfmon? If so, its also in that list: START_EXCEPTION(perfmon); NORMAL_EXCEPTION_PROLOG(0x260, BOOKE_INTERRUPT_PERFORMANCE_MONITOR, PROLOG_ADDITION_NONE) EXCEPTION_COMMON(0x260, PACA_EXGEN, INTS_DISABLE) Where it is recorded in paca-irq_happned to be replayed later ? Actually we don't want replay the perfmon interrupt later. We would run it even soft irq is disabled and just treat it as NMI. Please see the following function quoted from arch/powerpc/perf/core-fsl-emb.c: /* * If interrupts were soft-disabled when a PMU interrupt occurs, treat * it as an NMI. */ static inline int perf_intr_is_nmi(struct pt_regs *regs) { #ifdef __powerpc64__ return !regs-softe; #else return 0; #endif } Is it because that we cannot afford to lose perfmon interrupt for more accurate capturing of data ? powerpc/perf: e500 support This implements perf_event support for the Freescale embedded performance monitor, based on the existing perf_event.c that supports server/classic chips. Some limitations: - Performance monitor interrupts are regular EE interrupts, and thus you can't profile places with interrupts disabled. We may want to implement soft IRQ-disabling, with perfmon interrupts exempted and treated as NMIs. Tiejun -Bharat Thanks, Kevin Tiejun -Bharat Now on KVM book3e we don't want to put them in the irq_happened lazy state but rather to execute them directly, so there is no reason for exception handling symmetry between host and guest. -Mike ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [RFC][KVM][PATCH 1/1] kvm:ppc:booke-64: soft-disable interrupts
On 05/09/2013 04:23 PM, Bhushan Bharat-R65777 wrote: -Original Message- From: Linuxppc-dev [mailto:linuxppc-dev- bounces+bharat.bhushan=freescale@lists.ozlabs.org] On Behalf Of Caraman Mihai Claudiu-B02008 Sent: Wednesday, May 08, 2013 6:44 PM To: Wood Scott-B07421; tiejun.chen Cc: linuxppc-dev@lists.ozlabs.org; ag...@suse.de; kvm-...@vger.kernel.org; k...@vger.kernel.org Subject: RE: [RFC][KVM][PATCH 1/1] kvm:ppc:booke-64: soft-disable interrupts This only disable soft interrupt for kvmppc_restart_interrupt() that restarts interrupts if they were meant for the host: a. SOFT_DISABLE_INTS() only for BOOKE_INTERRUPT_EXTERNAL | BOOKE_INTERRUPT_DECREMENTER | BOOKE_INTERRUPT_DOORBELL Those aren't the only exceptions that can end up going to the host. We could get a TLB miss that results in a heavyweight MMIO exit, etc. And shouldn't we handle kvmppc_restart_interrupt() like the original HOST flow? #define MASKABLE_EXCEPTION(trapnum, intnum, label, hdlr, ack) \ START_EXCEPTION(label); \ NORMAL_EXCEPTION_PROLOG(trapnum, intnum, PROLOG_ADDITION_MASKABLE)\ EXCEPTION_COMMON(trapnum, PACA_EXGEN, *INTS_DISABLE*) \ ... Could you elaborate on what you mean? I think Tiejun was saying that host has flags and replays only EE/DEC/DBELL interrupts. There is special macro masked_interrupt_book3e in those exception handlers that sets paca-irq_happened. The list of replied interrupts is limited to asynchronous noncritical interrupts which can be masked by MSR[EE] (therefore no TLB miss). Now on KVM book3e we don't want to put them in the irq_happened lazy state but rather to execute them directly, so there is no reason for exception handling symmetry between host and guest. Another Question: The case is: Actually in the case GS=1 even if EE=0, EXT/DEC/DBELL still occur as I recall. Case 1) - Local_irq_disable() will set soft_enabled = 0 - Now Externel interrupt happens, there we set PACA_IRQ_EE in irq_happened, Also clears EE in SRR1 and rfi. So interrupts are hard disabled. No more other interrupt gated by MSR.EE can happen. Looks like the idea here is to not let a device keep on inserting interrupt till the interrupt condition on device is cleared, right? I don't understand the interrupt condition on device is cleared here. I think regardless if you clear the device interrupt status, the system still receive a pending interrupt once EE or GS = 1. - local_irq_enable() - This checks that irq_happened is set, and replays ret_from_except also check to replay. Now the case 2) Case 2) - Local_irq_disable() will set soft_enabled = 0 - Now DEC interrupt happens. We set PACA_IRQ_DEC in irq_happened, But do not clear EE in SRR1 and rfi. So interrupts are not hard disabled. - Now say EE interrupt happens, there we set PACA_IRQ_EE in irq_happened, Also clears EE in SRR1 and rfi. So interrupts are hard disabled. - local_irq_enable() - This checks that irq_happened is set. IIUC, it replays only one interrupt? is not it? After anyone is replayed in arch_local_irq_restore(), we will set soft/hard interrupt there: set_soft_enabled(1); __hard_irq_enable(); Then any pending interrupt can be executed now. Additionally, ret_from_except probably check to replay all. Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [RFC][KVM][PATCH 1/1] kvm:ppc:booke-64: soft-disable interrupts
On 05/09/2013 06:00 PM, Bhushan Bharat-R65777 wrote: -Original Message- From: tiejun.chen [mailto:tiejun.c...@windriver.com] Sent: Thursday, May 09, 2013 3:15 PM To: Bhushan Bharat-R65777 Cc: Caraman Mihai Claudiu-B02008; Wood Scott-B07421; linuxppc- d...@lists.ozlabs.org; ag...@suse.de; kvm-...@vger.kernel.org; k...@vger.kernel.org Subject: Re: [RFC][KVM][PATCH 1/1] kvm:ppc:booke-64: soft-disable interrupts On 05/09/2013 04:23 PM, Bhushan Bharat-R65777 wrote: -Original Message- From: Linuxppc-dev [mailto:linuxppc-dev- bounces+bharat.bhushan=freescale@lists.ozlabs.org] On Behalf Of bounces+Caraman Mihai Claudiu-B02008 Sent: Wednesday, May 08, 2013 6:44 PM To: Wood Scott-B07421; tiejun.chen Cc: linuxppc-dev@lists.ozlabs.org; ag...@suse.de; kvm-...@vger.kernel.org; k...@vger.kernel.org Subject: RE: [RFC][KVM][PATCH 1/1] kvm:ppc:booke-64: soft-disable interrupts This only disable soft interrupt for kvmppc_restart_interrupt() that restarts interrupts if they were meant for the host: a. SOFT_DISABLE_INTS() only for BOOKE_INTERRUPT_EXTERNAL | BOOKE_INTERRUPT_DECREMENTER | BOOKE_INTERRUPT_DOORBELL Those aren't the only exceptions that can end up going to the host. We could get a TLB miss that results in a heavyweight MMIO exit, etc. And shouldn't we handle kvmppc_restart_interrupt() like the original HOST flow? #define MASKABLE_EXCEPTION(trapnum, intnum, label, hdlr, ack) \ START_EXCEPTION(label); \ NORMAL_EXCEPTION_PROLOG(trapnum, intnum, PROLOG_ADDITION_MASKABLE)\ EXCEPTION_COMMON(trapnum, PACA_EXGEN, *INTS_DISABLE*) \ ... Could you elaborate on what you mean? I think Tiejun was saying that host has flags and replays only EE/DEC/DBELL interrupts. There is special macro masked_interrupt_book3e in those exception handlers that sets paca- irq_happened. The list of replied interrupts is limited to asynchronous noncritical interrupts which can be masked by MSR[EE] (therefore no TLB miss). Now on KVM book3e we don't want to put them in the irq_happened lazy state but rather to execute them directly, so there is no reason for exception handling symmetry between host and guest. Another Question: The case is: Actually in the case GS=1 even if EE=0, EXT/DEC/DBELL still occur as I recall. Case 1) - Local_irq_disable() will set soft_enabled = 0 - Now Externel interrupt happens, there we set PACA_IRQ_EE in irq_happened, Also clears EE in SRR1 and rfi. So interrupts are hard disabled. No more other interrupt gated by MSR.EE can happen. Looks like the idea here is to not let a device keep on inserting interrupt till the interrupt condition on device is cleared, right? I don't understand the interrupt condition on device is cleared here. I think regardless if you clear the device interrupt status, the system still receive a pending interrupt once EE or GS = 1. Once yes, but I think to avoid flood of device interrupt we disable MSR.EE when soft-disabled. But we neither ACK nor send EOI to that irq in the interrupt controller, so that should be in pending state. - local_irq_enable() - This checks that irq_happened is set, and replays ret_from_except also check to replay. Now the case 2) Case 2) - Local_irq_disable() will set soft_enabled = 0 - Now DEC interrupt happens. We set PACA_IRQ_DEC in irq_happened, But do not clear EE in SRR1 and rfi. So interrupts are not hard disabled. - Now say EE interrupt happens, there we set PACA_IRQ_EE in irq_happened, Also clears EE in SRR1 and rfi. So interrupts are hard disabled. - local_irq_enable() - This checks that irq_happened is set. IIUC, it replays only one interrupt? is not it? After anyone is replayed in arch_local_irq_restore(), we will set soft/hard interrupt there: set_soft_enabled(1); __hard_irq_enable(); Then any pending interrupt can be executed now. Do you mean that the interrupt should fire again? I means the pending exception including external interrupt, the decrementer exception and the doorbell exception, can trap CPU once EE=1 with __hard_irq_enable() here. Then the kernel can handle those exception since soft enable is also 1 now. Additionally, ret_from_except probably check to replay all. Local_irq_enable() will not take us to ret_from_except. Yes. I just say ret_from_except can provide an approach to replay all :) Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [v1][KVM][PATCH 1/1] kvm:ppc:booehv: direct ISI exception to Guest
On 05/08/2013 05:28 PM, tiejun.chen wrote: On 05/08/2013 05:20 PM, Caraman Mihai Claudiu-B02008 wrote: -Original Message- From: kvm-ow...@vger.kernel.org [mailto:kvm-ow...@vger.kernel.org] On Behalf Of tiejun.chen Sent: Wednesday, May 08, 2013 4:54 AM To: Wood Scott-B07421 Cc: ag...@suse.de; kvm-...@vger.kernel.org; k...@vger.kernel.org; linuxppc-dev@lists.ozlabs.org Subject: Re: [v1][KVM][PATCH 1/1] kvm:ppc:booehv: direct ISI exception to Guest On 05/08/2013 07:40 AM, Scott Wood wrote: On 05/07/2013 06:06:30 AM, Tiejun Chen wrote: We also can direct ISI exception to Guest like DSI. Signed-off-by: Tiejun Chen tiejun.c...@windriver.com --- arch/powerpc/kvm/booke_emulate.c |3 +++ arch/powerpc/kvm/e500mc.c|3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) Are you seeing a real performance improvement from this? This will interfere No. But after we reduce the exit to host, shouldn't this improve performance? We lose some flexibility for this so it make sense only if we gain measurable improvements. Sounds we have much more works to do. somewhat with using the VF bit, if we were to ever do so, since VF only affects Sorry, what is the VF you said? VF stands for virtualization fault see MAS8[VF] and we may use it for virtualized I almost forget this point :) Looks KVM PPC have no this mechanism currently since I don't find MAS8_VF is used in kernel, right? If I'm missing something please correct me. Tiejun MMIO. The hypervisor should deny execute access on pages marked with VF. Accordingly in this case guest ISI exceptions should be handled by the hypervisor. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [RFC][KVM][PATCH 1/1] kvm:ppc:booke-64: soft-disable interrupts
On 05/09/2013 07:21 PM, Bhushan Bharat-R65777 wrote: -Original Message- From: tiejun.chen [mailto:tiejun.c...@windriver.com] Sent: Thursday, May 09, 2013 3:48 PM To: Bhushan Bharat-R65777 Cc: Caraman Mihai Claudiu-B02008; Wood Scott-B07421; linuxppc- d...@lists.ozlabs.org; ag...@suse.de; kvm-...@vger.kernel.org; k...@vger.kernel.org Subject: Re: [RFC][KVM][PATCH 1/1] kvm:ppc:booke-64: soft-disable interrupts On 05/09/2013 06:00 PM, Bhushan Bharat-R65777 wrote: -Original Message- From: tiejun.chen [mailto:tiejun.c...@windriver.com] Sent: Thursday, May 09, 2013 3:15 PM To: Bhushan Bharat-R65777 Cc: Caraman Mihai Claudiu-B02008; Wood Scott-B07421; linuxppc- d...@lists.ozlabs.org; ag...@suse.de; kvm-...@vger.kernel.org; k...@vger.kernel.org Subject: Re: [RFC][KVM][PATCH 1/1] kvm:ppc:booke-64: soft-disable interrupts On 05/09/2013 04:23 PM, Bhushan Bharat-R65777 wrote: -Original Message- From: Linuxppc-dev [mailto:linuxppc-dev- bounces+bharat.bhushan=freescale@lists.ozlabs.org] On Behalf Of bounces+Caraman Mihai Claudiu-B02008 Sent: Wednesday, May 08, 2013 6:44 PM To: Wood Scott-B07421; tiejun.chen Cc: linuxppc-dev@lists.ozlabs.org; ag...@suse.de; kvm-...@vger.kernel.org; k...@vger.kernel.org Subject: RE: [RFC][KVM][PATCH 1/1] kvm:ppc:booke-64: soft-disable interrupts This only disable soft interrupt for kvmppc_restart_interrupt() that restarts interrupts if they were meant for the host: a. SOFT_DISABLE_INTS() only for BOOKE_INTERRUPT_EXTERNAL | BOOKE_INTERRUPT_DECREMENTER | BOOKE_INTERRUPT_DOORBELL Those aren't the only exceptions that can end up going to the host. We could get a TLB miss that results in a heavyweight MMIO exit, etc. And shouldn't we handle kvmppc_restart_interrupt() like the original HOST flow? #define MASKABLE_EXCEPTION(trapnum, intnum, label, hdlr, ack) \ START_EXCEPTION(label); \ NORMAL_EXCEPTION_PROLOG(trapnum, intnum, PROLOG_ADDITION_MASKABLE)\ EXCEPTION_COMMON(trapnum, PACA_EXGEN, *INTS_DISABLE*) \ ... Could you elaborate on what you mean? I think Tiejun was saying that host has flags and replays only EE/DEC/DBELL interrupts. There is special macro masked_interrupt_book3e in those exception handlers that sets paca- irq_happened. The list of replied interrupts is limited to asynchronous noncritical interrupts which can be masked by MSR[EE] (therefore no TLB miss). Now on KVM book3e we don't want to put them in the irq_happened lazy state but rather to execute them directly, so there is no reason for exception handling symmetry between host and guest. Another Question: The case is: Actually in the case GS=1 even if EE=0, EXT/DEC/DBELL still occur as I recall. Case 1) - Local_irq_disable() will set soft_enabled = 0 - Now Externel interrupt happens, there we set PACA_IRQ_EE in irq_happened, Also clears EE in SRR1 and rfi. So interrupts are hard disabled. No more other interrupt gated by MSR.EE can happen. Looks like the idea here is to not let a device keep on inserting interrupt till the interrupt condition on device is cleared, right? I don't understand the interrupt condition on device is cleared here. I think regardless if you clear the device interrupt status, the system still receive a pending interrupt once EE or GS = 1. Once yes, but I think to avoid flood of device interrupt we disable MSR.EE when soft-disabled. But we neither ACK nor send EOI to that irq in the interrupt controller, so that should be in pending state. - local_irq_enable() - This checks that irq_happened is set, and replays ret_from_except also check to replay. Now the case 2) Case 2) - Local_irq_disable() will set soft_enabled = 0 - Now DEC interrupt happens. We set PACA_IRQ_DEC in irq_happened, But do not clear EE in SRR1 and rfi. So interrupts are not hard disabled. - Now say EE interrupt happens, there we set PACA_IRQ_EE in irq_happened, Also clears EE in SRR1 and rfi. So interrupts are hard disabled. - local_irq_enable() - This checks that irq_happened is set. IIUC, it replays only one interrupt? is not it? After anyone is replayed in arch_local_irq_restore(), we will set soft/hard interrupt there: set_soft_enabled(1); __hard_irq_enable(); Then any pending interrupt can be executed now. Do you mean that the interrupt should fire again? I means the pending exception including external interrupt, the decrementer exception and the doorbell exception, can trap CPU once EE=1 with __hard_irq_enable() here. Then the kernel can handle those exception since soft enable is also 1 now. Additionally, ret_from_except probably check to replay all. Local_irq_enable() will not take us to ret_from_except. Yes. I just say ret_from_except can provide an approach to replay all :) __replay_interrupt() from arch_local_irq_enable() will take us to ret_from_except/lite :) There all pending interrupts
Re: [v1][KVM][PATCH 1/1] kvm:ppc:booehv: direct ISI exception to Guest
On 05/09/2013 07:34 PM, Caraman Mihai Claudiu-B02008 wrote: VF stands for virtualization fault see MAS8[VF] and we may use it for virtualized Looks KVM PPC have no this mechanism currently since I don't find MAS8_VF is used in kernel, right? Yes but 'we may use it' in the feature, I have a functional POC with VF. Any IO performance to be improved with this POC? Now we capture virtualized MMIO accesses as TLB misses (we don't write into HW TLB guest translation outside visible memslots). Yes. Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v2 3/4] kvm/ppc: Call trace_hardirqs_on before entry
On 05/10/2013 11:34 AM, Bhushan Bharat-R65777 wrote: -Original Message- From: kvm-ow...@vger.kernel.org [mailto:kvm-ow...@vger.kernel.org] On Behalf Of Scott Wood Sent: Friday, May 10, 2013 8:40 AM To: Alexander Graf; Benjamin Herrenschmidt Cc: kvm-...@vger.kernel.org; k...@vger.kernel.org; linuxppc-dev@lists.ozlabs.org; Wood Scott-B07421 Subject: [PATCH v2 3/4] kvm/ppc: Call trace_hardirqs_on before entry Currently this is only being done on 64-bit. Rather than just move it out of the 64-bit ifdef, move it to kvm_lazy_ee_enable() so that it is consistent with lazy ee state, and so that we don't track more host code as interrupts-enabled than necessary. Rename kvm_lazy_ee_enable() to kvm_fix_ee_before_entry() to reflect that this function now has a role on 32-bit as well. Signed-off-by: Scott Wood scottw...@freescale.com --- arch/powerpc/include/asm/kvm_ppc.h | 11 --- arch/powerpc/kvm/book3s_pr.c |4 ++-- arch/powerpc/kvm/booke.c |4 ++-- arch/powerpc/kvm/powerpc.c |2 -- 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index a5287fe..6885846 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -394,10 +394,15 @@ static inline void kvmppc_mmu_flush_icache(pfn_t pfn) } } -/* Please call after prepare_to_enter. This function puts the lazy ee state - back to normal mode, without actually enabling interrupts. */ -static inline void kvmppc_lazy_ee_enable(void) +/* + * Please call after prepare_to_enter. This function puts the lazy ee and irq + * disabled tracking state back to normal mode, without actually enabling + * interrupts. + */ +static inline void kvmppc_fix_ee_before_entry(void) { + trace_hardirqs_on(); + #ifdef CONFIG_PPC64 /* Only need to enable IRQs by hard enabling them after this */ local_paca-irq_happened = 0; diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index bdc40b8..0b97ce4 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -890,7 +890,7 @@ program_interrupt: local_irq_enable(); r = s; } else { - kvmppc_lazy_ee_enable(); + kvmppc_fix_ee_before_entry(); } } @@ -1161,7 +1161,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) if (vcpu-arch.shared-msr MSR_FP) kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); - kvmppc_lazy_ee_enable(); + kvmppc_fix_ee_before_entry(); ret = __kvmppc_vcpu_run(kvm_run, vcpu); diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 705fc5c..eb89b83 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -673,7 +673,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) ret = s; goto out; } - kvmppc_lazy_ee_enable(); + kvmppc_fix_ee_before_entry(); local_irq_disable() is called before kvmppc_prepare_to_enter(). In patch 4, we call hard_irq_disable() once enter kvmppc_prepare_to_enter(). Tiejun Now we put the irq_happend and soft_enabled back to previous state without checking for any interrupt happened in between. If any interrupt happens in between, will not that be lost? -Bharat kvm_guest_enter(); @@ -1154,7 +1154,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, local_irq_enable(); r = (s 2) | RESUME_HOST | (r RESUME_FLAG_NV); } else { - kvmppc_lazy_ee_enable(); + kvmppc_fix_ee_before_entry(); } } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 6316ee3..4e05f8c 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -117,8 +117,6 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) kvm_guest_exit(); continue; } - - trace_hardirqs_on(); #endif kvm_guest_enter(); -- 1.7.10.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v2 4/4] kvm/ppc: IRQ disabling cleanup
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 4e05f8c..f8659aa 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -64,12 +64,14 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) { int r = 1; - WARN_ON_ONCE(!irqs_disabled()); + WARN_ON(irqs_disabled()); + hard_irq_disable(); Here we hard disable in kvmppc_prepare_to_enter(), so my comment in other patch about interrupt loss is no more valid. So here MSR.EE = 0 local_paca-soft_enabled = 0 local_paca-irq_happened |= PACA_IRQ_HARD_DIS; + while (true) { if (need_resched()) { local_irq_enable(); This will make the state: MSR.EE = 1 local_paca-soft_enabled = 1 local_paca-irq_happened = PACA_IRQ_HARD_DIS; //same as before Why is this same the above state? local_irq_enable() can call __check_irq_replay() to clear PACA_IRQ_HARD_DIS. Is that a valid state where interrupts are fully enabled and irq_happend in not 0? cond_resched(); - local_irq_disable(); + hard_irq_disable(); continue; } @@ -95,7 +97,7 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) local_irq_enable(); trace_kvm_check_requests(vcpu); r = kvmppc_core_check_requests(vcpu); - local_irq_disable(); + hard_irq_disable(); if (r 0) continue; break; @@ -108,21 +110,14 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) } #ifdef CONFIG_PPC64 - /* lazy EE magic */ - hard_irq_disable(); - if (lazy_irq_pending()) { - /* Got an interrupt in between, try again */ - local_irq_enable(); - local_irq_disable(); - kvm_guest_exit(); - continue; - } + WARN_ON(lazy_irq_pending()); #endif kvm_guest_enter(); - break; + return r; } + local_irq_enable(); return r; } int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) { int r = 0; WARN_ON_ONCE(!irqs_disabled()); kvmppc_core_check_exceptions(vcpu); if (vcpu-requests) { /* Exception delivery raised request; start over */ return 1; } if (vcpu-arch.shared-msr MSR_WE) { local_irq_enable(); kvm_vcpu_block(vcpu); clear_bit(KVM_REQ_UNHALT, vcpu-requests); local_irq_disable(); ^^^ We do not require hard_irq_disable() here? Between kvmppc_core_prepare_to_enter() and kvmppc_prepare_to_enter(), as I recall Scott had some discussions with Ben earlier. Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [v1][KVM][PATCH 1/1] kvm:ppc:booehv: direct ISI exception to Guest
On 05/08/2013 05:20 PM, Caraman Mihai Claudiu-B02008 wrote: -Original Message- From: kvm-ow...@vger.kernel.org [mailto:kvm-ow...@vger.kernel.org] On Behalf Of tiejun.chen Sent: Wednesday, May 08, 2013 4:54 AM To: Wood Scott-B07421 Cc: ag...@suse.de; kvm-...@vger.kernel.org; k...@vger.kernel.org; linuxppc-dev@lists.ozlabs.org Subject: Re: [v1][KVM][PATCH 1/1] kvm:ppc:booehv: direct ISI exception to Guest On 05/08/2013 07:40 AM, Scott Wood wrote: On 05/07/2013 06:06:30 AM, Tiejun Chen wrote: We also can direct ISI exception to Guest like DSI. Signed-off-by: Tiejun Chen tiejun.c...@windriver.com --- arch/powerpc/kvm/booke_emulate.c |3 +++ arch/powerpc/kvm/e500mc.c|3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) Are you seeing a real performance improvement from this? This will interfere No. But after we reduce the exit to host, shouldn't this improve performance? We lose some flexibility for this so it make sense only if we gain measurable improvements. Sounds we have much more works to do. somewhat with using the VF bit, if we were to ever do so, since VF only affects Sorry, what is the VF you said? VF stands for virtualization fault see MAS8[VF] and we may use it for virtualized I almost forget this point :) MMIO. The hypervisor should deny execute access on pages marked with VF. Accordingly in this case guest ISI exceptions should be handled by the hypervisor. Thanks for your information. Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH] powerpc: Make hard_irq_disable() do the right thing vs. irq tracing
On 05/07/2013 03:04 PM, Benjamin Herrenschmidt wrote: If hard_irq_disable() is called while interrupts are already soft-disabled (which is the most common case) all is already well. However you can (and in some cases want) to call it while everything is enabled (to make sure you don't get a lazy even, for example before entry into KVM guests) and in this case we need to inform the irq tracer that the irqs are going off. We have to change the inline into a macro to avoid an include circular dependency hell hole. Signed-off-by: Benjamin Herrenschmidt b...@kernel.crashing.org --- Tested on pseries, Scott, I don't expect a problem with that patch especially since most callers already are soft disabled, so I'll merge it now along with my other pending stuff and you can simplify your KVM one. diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index e45c494..d615b28 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -95,15 +95,13 @@ static inline bool arch_irqs_disabled(void) #define __hard_irq_disable() __mtmsrd(local_paca-kernel_msr, 1) #endif -static inline void hard_irq_disable(void) -{ - __hard_irq_disable(); - get_paca()-soft_enabled = 0; - get_paca()-irq_happened |= PACA_IRQ_HARD_DIS; -} - -/* include/linux/interrupt.h needs hard_irq_disable to be a macro */ -#define hard_irq_disable hard_irq_disable +#define hard_irq_disable() do {\ + __hard_irq_disable(); \ + if (local_paca-soft_enabled)\ + trace_hardirqs_off(); \ + get_paca()-soft_enabled = 0;\ Could we simplify this as follows: +#define hard_irq_disable() do {\ + __hard_irq_disable(); \ + if (get_paca()-soft_enabled) { \ + trace_hardirqs_off(); \ + get_paca()-soft_enabled = 0;\ + } \ + get_paca()-irq_happened |= PACA_IRQ_HARD_DIS; \ +} while(0) Tiejun + get_paca()-irq_happened |= PACA_IRQ_HARD_DIS; \ +} while(0) static inline bool lazy_irq_pending(void) { ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [v1][KVM][PATCH 1/1] kvm:ppc: enable doorbell exception with E500MC
On 05/07/2013 09:33 AM, tiejun.chen wrote: On 05/06/2013 10:58 PM, Alexander Graf wrote: On 05/06/2013 04:53 AM, Tiejun Chen wrote: Actually E500MC also support doorbell exception, and CONFIG_PPC_E500MC can cover BOOK3E/BOOK3E_64 as well. Signed-off-by: Tiejun Chentiejun.c...@windriver.com --- arch/powerpc/kvm/booke.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 1020119..dc1f590 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -795,7 +795,7 @@ static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu, kvmppc_fill_pt_regs(regs); timer_interrupt(regs); break; -#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_BOOK3E_64) +#if defined(CONFIG_PPC_E500MC) I suppose you mean CONFIG_KVM_E500MC here? Why didn't this work for you before? This works for me. Here I just mean currently CONFIG_PPC_E500MC is always selected no matter what CONFIG_PPC_FSL_BOOK3E or CONFIG_PPC_BOOK3E_64 is enabled. And especially, this is already in the arch/powerpc/kvm/booke.c file, so I think one #ifdef (CONFIG_PPC_E500MC) is enough and also makes sense. The ifdef above should cover the same range of CPUs. Or furthermore, the #ifdef CONFIG_PPC_DOORBELL is reasonable to cover this. I think this may be better so I send next version with this. So please take a look at. Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [v1][KVM][PATCH 1/1] kvm:ppc:booehv: direct ISI exception to Guest
On 05/08/2013 07:40 AM, Scott Wood wrote: On 05/07/2013 06:06:30 AM, Tiejun Chen wrote: We also can direct ISI exception to Guest like DSI. Signed-off-by: Tiejun Chen tiejun.c...@windriver.com --- arch/powerpc/kvm/booke_emulate.c |3 +++ arch/powerpc/kvm/e500mc.c|3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) Are you seeing a real performance improvement from this? This will interfere No. But after we reduce the exit to host, shouldn't this improve performance? somewhat with using the VF bit, if we were to ever do so, since VF only affects Sorry, what is the VF you said? Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [v1][KVM][PATCH 1/1] kvm:ppc: enable doorbell exception with E500MC
On 05/06/2013 10:58 PM, Alexander Graf wrote: On 05/06/2013 04:53 AM, Tiejun Chen wrote: Actually E500MC also support doorbell exception, and CONFIG_PPC_E500MC can cover BOOK3E/BOOK3E_64 as well. Signed-off-by: Tiejun Chentiejun.c...@windriver.com --- arch/powerpc/kvm/booke.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 1020119..dc1f590 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -795,7 +795,7 @@ static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu, kvmppc_fill_pt_regs(regs); timer_interrupt(regs); break; -#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_BOOK3E_64) +#if defined(CONFIG_PPC_E500MC) I suppose you mean CONFIG_KVM_E500MC here? Why didn't this work for you before? This works for me. Here I just mean currently CONFIG_PPC_E500MC is always selected no matter what CONFIG_PPC_FSL_BOOK3E or CONFIG_PPC_BOOK3E_64 is enabled. And especially, this is already in the arch/powerpc/kvm/booke.c file, so I think one #ifdef (CONFIG_PPC_E500MC) is enough and also makes sense. The ifdef above should cover the same range of CPUs. Or furthermore, the #ifdef CONFIG_PPC_DOORBELL is reasonable to cover this. Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [RFC][KVM][PATCH 1/1] kvm:ppc:booke-64: soft-disable interrupts
On 05/07/2013 07:50 AM, Scott Wood wrote: On 05/05/2013 10:13:17 PM, tiejun.chen wrote: On 05/06/2013 11:10 AM, Tiejun Chen wrote: For the external interrupt, the decrementer exception and the doorbell excpetion, we also need to soft-disable interrupts while doing as host interrupt handlers since the DO_KVM hook is always performed to skip EXCEPTION_COMMON then miss this original chance with the 'ints' (INTS_DISABLE). http://patchwork.ozlabs.org/patch/241344/ http://patchwork.ozlabs.org/patch/241412/ :-) I'm observing the same behaviour as well: WARN_ON_ONCE(!irqs_disabled()); Signed-off-by: Tiejun Chen tiejun.c...@windriver.com --- arch/powerpc/kvm/bookehv_interrupts.S |9 + 1 file changed, 9 insertions(+) diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S index e8ed7d6..2fd62bf 100644 --- a/arch/powerpc/kvm/bookehv_interrupts.S +++ b/arch/powerpc/kvm/bookehv_interrupts.S @@ -33,6 +33,8 @@ #ifdef CONFIG_64BIT #include asm/exception-64e.h +#include asm/hw_irq.h +#include asm/irqflags.h #else #include ../kernel/head_booke.h /* for THREAD_NORMSAVE() */ #endif @@ -469,6 +471,13 @@ _GLOBAL(kvmppc_resume_host) PPC_LLr3, HOST_RUN(r1) mrr5, r14 /* intno */ mrr14, r4 /* Save vcpu pointer. */ +#ifdef CONFIG_64BIT +/* Should we soft-disable interrupts? */ +andi.r6, r5, BOOKE_INTERRUPT_EXTERNAL | BOOKE_INTERRUPT_DECREMENTER | BOOKE_INTERRUPT_DOORBELL +beqskip_soft_dis +SOFT_DISABLE_INTS(r7,r8) +skip_soft_dis: +#endif Why wouldn't we always disable them? kvmppc_handle_exit() will enable interrupts when it's ready. This only disable soft interrupt for kvmppc_restart_interrupt() that restarts interrupts if they were meant for the host: a. SOFT_DISABLE_INTS() only for BOOKE_INTERRUPT_EXTERNAL | BOOKE_INTERRUPT_DECREMENTER | BOOKE_INTERRUPT_DOORBELL b. bl kvmppc_handle_exit c. kvmppc_handle_exit() { int r = RESUME_HOST; int s; /* update before a new last_exit_type is rewritten */ kvmppc_update_timing_stats(vcpu); /* restart interrupts if they were meant for the host */ kvmppc_restart_interrupt(vcpu, exit_nr); local_irq_enable(); == Enable again. And shouldn't we handle kvmppc_restart_interrupt() like the original HOST flow? #define MASKABLE_EXCEPTION(trapnum, intnum, label, hdlr, ack) \ START_EXCEPTION(label); \ NORMAL_EXCEPTION_PROLOG(trapnum, intnum, PROLOG_ADDITION_MASKABLE)\ EXCEPTION_COMMON(trapnum, PACA_EXGEN, *INTS_DISABLE*) \ ... So I think this should be reasonable :) Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [RFC][KVM][PATCH 1/1] kvm:ppc:booke-64: soft-disable interrupts
On 05/07/2013 10:06 AM, Scott Wood wrote: On 05/06/2013 08:56:25 PM, tiejun.chen wrote: On 05/07/2013 07:50 AM, Scott Wood wrote: On 05/05/2013 10:13:17 PM, tiejun.chen wrote: On 05/06/2013 11:10 AM, Tiejun Chen wrote: For the external interrupt, the decrementer exception and the doorbell excpetion, we also need to soft-disable interrupts while doing as host interrupt handlers since the DO_KVM hook is always performed to skip EXCEPTION_COMMON then miss this original chance with the 'ints' (INTS_DISABLE). http://patchwork.ozlabs.org/patch/241344/ http://patchwork.ozlabs.org/patch/241412/ :-) I'm observing the same behaviour as well: WARN_ON_ONCE(!irqs_disabled()); So, could you explain the benefits of your approach over what's being discussed in those threads? They're a long thread so I think I need to take time to see :) Why wouldn't we always disable them? kvmppc_handle_exit() will enable interrupts when it's ready. This only disable soft interrupt for kvmppc_restart_interrupt() that restarts interrupts if they were meant for the host: a. SOFT_DISABLE_INTS() only for BOOKE_INTERRUPT_EXTERNAL | BOOKE_INTERRUPT_DECREMENTER | BOOKE_INTERRUPT_DOORBELL Those aren't the only exceptions that can end up going to the host. We could get a TLB miss that results in a heavyweight MMIO exit, etc. This is like host handler, so I'm just disabling soft interrupt during kvmppc_restart_interrupt() for Doorbell interrupt/Decrementer Interrupt/External Input Interrupt. I don't see anything should be disabled for any TLB exception in host handler. And I'd rather see any fix for this problem stay out of the asm code. We already have an appropriate SOFT_DISABLE_INTS so I think we can take this easily :) b. bl kvmppc_handle_exit c. kvmppc_handle_exit() { int r = RESUME_HOST; int s; /* update before a new last_exit_type is rewritten */ kvmppc_update_timing_stats(vcpu); /* restart interrupts if they were meant for the host */ kvmppc_restart_interrupt(vcpu, exit_nr); local_irq_enable();== Enable again. And shouldn't we handle kvmppc_restart_interrupt() like the original HOST flow? #define MASKABLE_EXCEPTION(trapnum, intnum, label, hdlr, ack) \ START_EXCEPTION(label); \ NORMAL_EXCEPTION_PROLOG(trapnum, intnum, PROLOG_ADDITION_MASKABLE)\ EXCEPTION_COMMON(trapnum, PACA_EXGEN, *INTS_DISABLE*) \ ... Could you elaborate on what you mean? In host handler, we always use MASKABLE_EXCEPTION() to define-to-handle some exceptions: Doorbell interrupt/Decrementer Interrupt/External Input Interrupt: #define MASKABLE_EXCEPTION(trapnum, intnum, label, hdlr, ack) \ START_EXCEPTION(label); \ NORMAL_EXCEPTION_PROLOG(trapnum, intnum, PROLOG_ADDITION_MASKABLE)\ EXCEPTION_COMMON(trapnum, PACA_EXGEN, *INTS_DISABLE*) \ This would call INTS_DISABLE, which is equal to SOFT_DISABLE_INTS(), to disable soft interrupt before call all associated handlers: do_IRQ()/timer_interrupt()/doorbell_exception(). But DO_KVM hook always skips INTS_DISABLE. So I think we also need to do INTS_DISABLE for kvmppc_restart_interrupt() since actually that restarts interrupts for the host with a similar way as they are called by host. Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [RFC][KVM][PATCH 1/1] kvm:ppc:booke-64: soft-disable interrupts
On 05/06/2013 11:10 AM, Tiejun Chen wrote: For the external interrupt, the decrementer exception and the doorbell excpetion, we also need to soft-disable interrupts while doing as host interrupt handlers since the DO_KVM hook is always performed to skip EXCEPTION_COMMON then miss this original chance with the 'ints' (INTS_DISABLE). Sorry, miss to send Ben. Tiejun Signed-off-by: Tiejun Chen tiejun.c...@windriver.com --- arch/powerpc/kvm/bookehv_interrupts.S |9 + 1 file changed, 9 insertions(+) diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S index e8ed7d6..2fd62bf 100644 --- a/arch/powerpc/kvm/bookehv_interrupts.S +++ b/arch/powerpc/kvm/bookehv_interrupts.S @@ -33,6 +33,8 @@ #ifdef CONFIG_64BIT #include asm/exception-64e.h +#include asm/hw_irq.h +#include asm/irqflags.h #else #include ../kernel/head_booke.h /* for THREAD_NORMSAVE() */ #endif @@ -469,6 +471,13 @@ _GLOBAL(kvmppc_resume_host) PPC_LL r3, HOST_RUN(r1) mr r5, r14 /* intno */ mr r14, r4 /* Save vcpu pointer. */ +#ifdef CONFIG_64BIT + /* Should we soft-disable interrupts? */ + andi. r6, r5, BOOKE_INTERRUPT_EXTERNAL | BOOKE_INTERRUPT_DECREMENTER | BOOKE_INTERRUPT_DOORBELL + beq skip_soft_dis + SOFT_DISABLE_INTS(r7,r8) +skip_soft_dis: +#endif bl kvmppc_handle_exit /* Restore vcpu pointer and the nonvolatiles we used. */ ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 1/1] kvm:book3e: Fix a build error
On 04/25/2013 08:11 PM, Caraman Mihai Claudiu-B02008 wrote: -Original Message- From: kvm-ppc-ow...@vger.kernel.org [mailto:kvm-ppc- ow...@vger.kernel.org] On Behalf Of Tiejun Chen Sent: Thursday, April 25, 2013 2:46 PM To: ga...@kernel.crashing.org Cc: linuxppc-dev@lists.ozlabs.org; kvm-...@vger.kernel.org; k...@vger.kernel.org Subject: [PATCH 1/1] kvm:book3e: Fix a build error Commit cd66cc2e, powerpc/85xx: Add AltiVec support for e6500, adds support for AltiVec on a Book-E class processor, but while compiling in the CONFIG_PPC_BOOK3E_64 and CONFIG_VIRTUALIZATION case, this introduce the following error: arch/powerpc/kernel/exceptions-64e.S:402: undefined reference to `kvmppc_handler_42_0x01B' arch/powerpc/kernel/built-in.o: In function `exc_altivec_assist_book3e': arch/powerpc/kernel/exceptions-64e.S:424: undefined reference to `kvmppc_handler_43_0x01B' make: *** [vmlinux] Error 1 Looks we should add these altivec kvm handlers. Signed-off-by: Tiejun Chen tiejun.c...@windriver.com --- arch/powerpc/kvm/bookehv_interrupts.S |5 + 1 file changed, 5 insertions(+) diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S index e8ed7d6..fa9c78a 100644 --- a/arch/powerpc/kvm/bookehv_interrupts.S +++ b/arch/powerpc/kvm/bookehv_interrupts.S @@ -319,6 +319,11 @@ kvm_handler BOOKE_INTERRUPT_DEBUG, EX_PARAMS(DBG), \ SPRN_DSRR0, SPRN_DSRR1, 0 kvm_handler BOOKE_INTERRUPT_DEBUG, EX_PARAMS(CRIT), \ SPRN_CSRR0, SPRN_CSRR1, 0 +/* altivec */ +kvm_handler BOOKE_INTERRUPT_ALTIVEC_UNAVAIL, EX_PARAMS(GEN), \ + SPRN_SRR0, SPRN_SRR1, 0 +kvm_handler BOOKE_INTERRUPT_ALTIVEC_ASSIST, EX_PARAMS(GEN), \ + SPRN_SRR0, SPRN_SRR1, 0 #else /* * For input register values, see arch/powerpc/include/asm/kvm_booke_hv_asm.h -- It seems that you are not using kvm-ppc-queue branch. This is just used to fix a build error in powerpc.git when introduce commit cd66cc2e, powerpc/85xx: Add AltiVec support for e6500, in *powerpc.git* as I mentioned in this patch head. I already have a patch ready for this (and AltiVec support is work This change don't block your AltiVec support for kvm unless you think this change is wrong. And especially, we always can reproduce this error with/without enabling AltiVec, so I also don't think this should be suspended until support e6500 in kvm since kvm based on e5500 should work. Tiejun in progress) but we need first to pull e6500 kernel patches from Linux tree into agraf.git. -Mike ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 1/1] usb: ehci-fsl: set INCR8 mode only on MPC512x
On 04/24/2013 01:55 PM, Anatolij Gustschin wrote: On Wed, 24 Apr 2013 10:55:10 +0800 Tiejun Chen tiejun.c...@windriver.com wrote: commit 761bbcb7, usb: ehci-fsl: set INCR8 mode for system bus interface on MPC512x, introduced to fix one MPC5121e (M36P) Errata by setting INCR8 mode for system bus interface on MPC512x, but we should make sure this is only valid for MPC512x like other parts of this commit. Otherwise NAK. It is already only valid for MPC512x. this would issue other platforms as abnormal without this similar Errata. This setting is in the ehci_fsl_mpc512x_drv_resume() function which is not called on other platforms. Yes, I already notice this and also send a notification to ignore this improper patch immediately ;-) Thanks, Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 1/1] usb: ehci-fsl: set INCR8 mode only on MPC512x
Sorry, please ignore this temporarily since looks this is already covered in tree. I will look further into this to send another version. Tiejun On 04/24/2013 10:55 AM, Tiejun Chen wrote: commit 761bbcb7, usb: ehci-fsl: set INCR8 mode for system bus interface on MPC512x, introduced to fix one MPC5121e (M36P) Errata by setting INCR8 mode for system bus interface on MPC512x, but we should make sure this is only valid for MPC512x like other parts of this commit. Otherwise this would issue other platforms as abnormal without this similar Errata. Signed-off-by: Tiejun Chen tiejun.c...@windriver.com --- drivers/usb/host/ehci-fsl.c | 10 +- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/ehci-fsl.c b/drivers/usb/host/ehci-fsl.c index d81d2fc..f4f2a7b 100644 --- a/drivers/usb/host/ehci-fsl.c +++ b/drivers/usb/host/ehci-fsl.c @@ -509,7 +509,15 @@ static int ehci_fsl_mpc512x_drv_resume(struct device *dev) ehci_writel(ehci, ISIPHYCTRL_PXE | ISIPHYCTRL_PHYE, hcd-regs + FSL_SOC_USB_ISIPHYCTRL); - ehci_writel(ehci, SBUSCFG_INCR8, hcd-regs + FSL_SOC_USB_SBUSCFG); + if (of_device_is_compatible(dev-parent-of_node, + fsl,mpc5121-usb2-dr)) { + /* +* set SBUSCFG:AHBBRST so that control msgs don't +* fail when doing heavy PATA writes. +*/ + ehci_writel(ehci, SBUSCFG_INCR8, + hcd-regs + FSL_SOC_USB_SBUSCFG); + } /* restore EHCI registers */ ehci_writel(ehci, pdata-pm_command, ehci-regs-command); ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH] powerpc: add a missing label in resume_kernel
On 04/10/2013 04:31 PM, Kevin Hao wrote: A label 0 was missed in the patch a9c4e541 (powerpc/kprobe: Complete kprobe and migrate exception frame). This will cause the kernel branch to an undetermined address if there really has a conflict when updating the thread flags. Signed-off-by: Kevin Hao haoke...@gmail.com Acked-By: Tiejun Chen tiejun.c...@windriver.com Cc: sta...@vger.kernel.org --- arch/powerpc/kernel/entry_64.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 256c5bf..ab079ed 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -657,7 +657,7 @@ resume_kernel: /* Clear _TIF_EMULATE_STACK_STORE flag */ lis r11,_TIF_EMULATE_STACK_STORE@h addir5,r9,TI_FLAGS - ldarx r4,0,r5 +0: ldarx r4,0,r5 andcr4,r4,r11 stdcx. r4,0,r5 bne-0b ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH]-[PATCH v2] kgdb: Removed kmalloc returned value cast
On 03/11/2013 09:33 AM, tiejun.chen wrote: On 03/11/2013 06:39 AM, Alex Grad wrote: While at it, check kmalloc return value. Signed-off-by: Alex Grad alex.g...@gmail.com --- arch/powerpc/kernel/kgdb.c |5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 5ca82cd..9e81dd8 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -159,7 +159,10 @@ static int kgdb_singlestep(struct pt_regs *regs) if (user_mode(regs)) return 0; -backup_current_thread_info = (struct thread_info *)kmalloc(sizeof(struct thread_info), GFP_KERNEL); +backup_current_thread_info = kmalloc(sizeof(struct thread_info), GFP_KERNEL); +if (!backup_current_thread_info) +return -ENOMEM; I already send a kgdb patchset in [v3][PATCH 6/6] powerpc/kgdb: remove copying the thread_info to remove these stuff since its unnecessary to copy current thread_info now. I guess that patchset needs to be reviewed with more time and I also think I can leave this copying to be compatible for other platforms in the future, but we really should fix this problem I introduced right now. So I'd like to fix this with DEFINE_PER_CPU. I send a patch, powerpc/kgdb: use DEFINE_PER_CPU to allocate kgdb's thread_info, so please take a look at if that version is fine to every one. Any comments are appreciated. BTW, I will update that kgdb patchset with this change to resend. Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH]-[PATCH v2] kgdb: Removed kmalloc returned value cast
On 03/11/2013 06:39 AM, Alex Grad wrote: While at it, check kmalloc return value. Signed-off-by: Alex Grad alex.g...@gmail.com --- arch/powerpc/kernel/kgdb.c |5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 5ca82cd..9e81dd8 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -159,7 +159,10 @@ static int kgdb_singlestep(struct pt_regs *regs) if (user_mode(regs)) return 0; - backup_current_thread_info = (struct thread_info *)kmalloc(sizeof(struct thread_info), GFP_KERNEL); + backup_current_thread_info = kmalloc(sizeof(struct thread_info), GFP_KERNEL); + if (!backup_current_thread_info) + return -ENOMEM; I already send a kgdb patchset in [v3][PATCH 6/6] powerpc/kgdb: remove copying the thread_info to remove these stuff since its unnecessary to copy current thread_info now. Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH] powerpc: kernel/kgdb.c: fix memory leakage
On 02/08/2013 10:41 AM, Benjamin Herrenschmidt wrote: On Thu, 2013-01-31 at 20:04 -0600, Jason Wessel wrote: diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 8747447..5ca82cd 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -154,12 +154,12 @@ static int kgdb_handle_breakpoint(struct pt_regs *regs) static int kgdb_singlestep(struct pt_regs *regs) { struct thread_info *thread_info, *exception_thread_info; - struct thread_info *backup_current_thread_info = \ - (struct thread_info *)kmalloc(sizeof(struct thread_info), GFP_KERNEL); + struct thread_info *backup_current_thread_info; Woh... This is definitely wrong. You have found a problem for sure, but this is not the right way to fix it. It is not a good idea to kmalloc while single stepping because you can hang the kernel if you single step any operation in kmalloc(). Ok. I applied the fix because it was obviously correct vs. the previous version of the code (ie. the kmalloc was already there) I am in the process of going through all the kgdb mails from the last few months while I had been away from the project, so I didn't catch this one and I see it has upstream commit (fefd9e6f8). I'll submit another patch to fix this the right way and use a static variable. This is ok to use a static variable here because this is not something we can recursively call at a single CPU level. But a static will be shared by all CPUs or do you mean a per-cpu one ? If Ben prefers we not burn the memory unless kgdb is active we can kmalloc / kfree the space we need at the time that kgdb is initialized. Else we can go with this patch you see below. We'll see what Ben desires. What about the stack ? thread info isn't very big... Actually booke already copy the thread_info when we enter the debug exception, and I will send a patch to do the same thing for book3e, so I also remove these copying action here then we're happy without these nasty stuff :) Tiejun Cheers, Ben. - diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index a7bc752..bb12c8b 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -151,15 +151,16 @@ static int kgdb_handle_breakpoint(struct pt_regs *regs) return 1; } +static struct thread_info kgdb_backup_thread_info[NR_CPUS]; + static int kgdb_singlestep(struct pt_regs *regs) { struct thread_info *thread_info, *exception_thread_info; - struct thread_info *backup_current_thread_info; + int cpu = raw_smp_processor_id(); if (user_mode(regs)) return 0; - backup_current_thread_info = (struct thread_info *)kmalloc(sizeof(struct thread_info), GFP_KERNEL); /* * On Book E and perhaps other processors, singlestep is handled on * the critical exception stack. This causes current_thread_info() @@ -175,7 +176,7 @@ static int kgdb_singlestep(struct pt_regs *regs) if (thread_info != exception_thread_info) { /* Save the original current_thread_info. */ - memcpy(backup_current_thread_info, exception_thread_info, sizeof *thread_info); + memcpy(kgdb_backup_thread_info[cpu], exception_thread_info, sizeof *thread_info); memcpy(exception_thread_info, thread_info, sizeof *thread_info); } @@ -183,9 +184,8 @@ static int kgdb_singlestep(struct pt_regs *regs) if (thread_info != exception_thread_info) /* Restore current_thread_info lastly. */ - memcpy(exception_thread_info, backup_current_thread_info, sizeof *thread_info); + memcpy(exception_thread_info, kgdb_backup_thread_info[cpu], sizeof *thread_info); - kfree(backup_current_thread_info); return 1; } - Thanks, Jason. if (user_mode(regs)) return 0; + backup_current_thread_info = (struct thread_info *)kmalloc(sizeof(struct thread_info), GFP_KERNEL); /* * On Book E and perhaps other processors, singlestep is handled on * the critical exception stack. This causes current_thread_info() @@ -185,6 +185,7 @@ static int kgdb_singlestep(struct pt_regs *regs) /* Restore current_thread_info lastly. */ memcpy(exception_thread_info, backup_current_thread_info, sizeof *thread_info); + kfree(backup_current_thread_info); return 1; } ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [v3][PATCH 0/6] powerpc/book3e: make kgdb to work well
On 02/27/2013 11:04 AM, Tiejun Chen wrote: This patchset is used to support kgdb/gdb on book3e. Validated on p4080ds and p5040ds with test single step and breakpoint Please ignore this thread since looks I'm missing to CC Jason :( Tiejun v3: * make work when enable CONFIG_RELOCATABLE * fix one typo in patch, powerpc/book3e: store critical/machine/debug exception thread info: ld r1,PACAKSAVE(r13); - ld r14,PACAKSAVE(r13); * remove copying the thread_info since booke and book3e always copy the thead_info now when we enter the debug exception, and so drop the v2 patch, book3e/kgdb: Fix a single stgep case of lazy IRQ v2: * Make sure we cover CONFIG_PPC_BOOK3E_64 safely * Use LOAD_REG_IMMEDIATE() to load properly the value of the constant expression in load debug exception stack * Copy thread infor form the kernel stack coming from usr * Rebase latest powerpc git tree v1: * Copy thread info only when we are from !user mode since we'll get kernel stack coming from usr directly. * remove save/restore EX_R14/EX_R15 since DBG_EXCEPTION_PROLOG already covered this. * use CURRENT_THREAD_INFO() conveniently to get thread. * fix some typos * add a patch to make sure gdb can generate a single step properly to invoke a kgdb state. * add a patch to if we need to replay an interrupt, we shouldn't restore that previous backup thread info to make sure we can replay an interrupt lately with a proper thread info. * rebase latest powerpc git tree v0: This patchset is used to support kgdb for book3e. -- Tiejun Chen (6): powerpc/book3e: load critical/machine/debug exception stack powerpc/book3e: store critical/machine/debug exception thread info book3e/kgdb: update thread's dbcr0 powerpc/book3e: support kgdb for kernel space kgdb/kgdbts: support ppc64 powerpc/kgdb: remove copying the thread_info arch/powerpc/kernel/exceptions-64e.S | 69 -- arch/powerpc/kernel/kgdb.c | 41 +--- drivers/misc/kgdbts.c|2 + 3 files changed, 77 insertions(+), 35 deletions(-) Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [RFC][v0][PATCH 1/1] ppc64: ignore interrupts while offline
Please ignore this to check next version. Tiejun On 01/15/2013 06:31 PM, Tiejun Chen wrote: With lazy interrupt, some implementations of hotplug will get some interrupts even while offline, just ignore these. Signed-off-by: Tiejun Chen tiejun.c...@windriver.com --- arch/powerpc/kernel/irq.c |6 ++ 1 file changed, 6 insertions(+) diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 4f97fe3..dbca574 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -144,6 +144,12 @@ notrace unsigned int __check_irq_replay(void) */ unsigned char happened = local_paca-irq_happened; + /* Some implementations of hotplug will get some interrupts while +* offline, just ignore these. +*/ + if (cpu_is_offline(smp_processor_id())) + return 0; + /* Clear bit 0 which we wouldn't clear otherwise */ local_paca-irq_happened = ~PACA_IRQ_HARD_DIS; ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 1/3] powerpc/book3e: load critical/machine/debug exception stack
On 12/19/2012 06:10 AM, Tabi Timur-B04825 wrote: On Thu, Oct 25, 2012 at 1:43 AM, Tiejun Chen tiejun.c...@windriver.com wrote: We always alloc critical/machine/debug check exceptions. This is different from the normal exception. So we should load these exception stack properly like we did for booke. Tiejun, I'm a little confused by these patches, because the actual critical exception handlers are still commented out: /* Critical Input Interrupt */ START_EXCEPTION(critical_input); CRIT_EXCEPTION_PROLOG(0x100, BOOKE_INTERRUPT_CRITICAL, PROLOG_ADDITION_NONE) // EXCEPTION_COMMON(0x100, PACA_EXCRIT, INTS_DISABLE) // bl special_reg_save_crit // CHECK_NAPPING(); // addir3,r1,STACK_FRAME_OVERHEAD // bl .critical_exception // b ret_from_crit_except b . Are you working on fixing this? I'm trying to fix it, too, but I No, I have no approach to Crit/MC and currently I'm focus only on Debug so please do this as you expect :) BTW, I send next version just now since there are something needed to correct/improve. Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [v1][PATCH 3/6] book3e/kgdb: update thread's dbcr0
On 12/21/2012 02:41 AM, Kumar Gala wrote: On Dec 20, 2012, at 3:08 AM, Tiejun Chen wrote: gdb always need to generate a single step properly to invoke a kgdb state. But with lazy interrupt, book3e can't always trigger a debug exception with a single step since the current is blocked for handling those pending exception, then we miss that expected dbcr configuration at last to generate a debug exception. So here we also update thread's dbcr0 to make sure the current can go back with that missed dbcr0 configuration. Signed-off-by: Tiejun Chen tiejun.c...@windriver.com --- arch/powerpc/kernel/kgdb.c | 10 ++ 1 file changed, 10 insertions(+) diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index c470a40..516b44b 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -426,8 +426,18 @@ int kgdb_arch_handle_exception(int vector, int signo, int err_code, /* set the trace bit if we're stepping */ if (remcom_in_buffer[0] == 's') { #ifdef CONFIG_PPC_ADV_DEBUG_REGS +#ifdef CONFIG_PPC_BOOK3E Should this really be CONFIG_PPC64 or CONFIG_PPC_BOOK3E_64? Yes, I think CONFIG_PPC_BOOK3E_64 is better currently. Because I didn't validate this on power arch since I have no a real machine :) + /* With lazy interrut we have to update thread dbcr0 here +* to make sure we can set debug properly at last to invoke +* kgdb again to work well. +*/ + current-thread.dbcr0 = + mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM; + mtspr(SPRN_DBCR0, current-thread.dbcr0); +#else mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM); +#endif Can we code this so we don't need the #else? I guess you're saying something like this: --- diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index c470a40..b2df42a 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -409,7 +409,7 @@ int kgdb_arch_handle_exception(int vector, int signo, int err_code, struct pt_regs *linux_regs) { char *ptr = remcom_in_buffer[1]; - unsigned long addr; + unsigned long addr, dbcr0; switch (remcom_in_buffer[0]) { /* @@ -426,8 +426,15 @@ int kgdb_arch_handle_exception(int vector, int signo, int err_code, /* set the trace bit if we're stepping */ if (remcom_in_buffer[0] == 's') { #ifdef CONFIG_PPC_ADV_DEBUG_REGS - mtspr(SPRN_DBCR0, - mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM); + dbcr0 = mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM; + mtspr(SPRN_DBCR0, dbcr0); +#ifdef CONFIG_PPC_BOOK3E_64 + /* With lazy interrut we have to update thread dbcr0 here +* to make sure we can set debug properly at last to invoke +* kgdb again to work well. +*/ + current-thread.dbcr0 = dbcr0; +#endif linux_regs-msr |= MSR_DE; #else linux_regs-msr |= MSR_SE; -- 1.7.9.5 Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [v2][PATCH 1/2] powerpc/kgdb: Fix a single stgep case of lazy IRQ
Please ignore v2 temporarily since I have to correct something lately. Sorry for any inconvenience. Tiejun On 10/18/2012 10:47 AM, Tiejun Chen wrote: When we're in kgdb_singlestep(), we have to work around to get thread_info by copying from the kernel stack before calling kgdb_handle_exception(), then copying it back afterwards. But for PPC64, we have a lazy interrupt implementation. So after copying thread info frome kernle stack, if we need to replay an interrupt, we shouldn't restore that previous backup thread info to make sure we can replay an interrupt lately with a proper thread info. This patch use __check_irq_replay() to guarantee this process. Signed-off-by: Tiejun Chen tiejun.c...@windriver.com --- arch/powerpc/kernel/irq.c | 12 +++- arch/powerpc/kernel/kgdb.c |7 --- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 71413f4..a773789 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -332,7 +332,17 @@ bool prep_irq_for_idle(void) return true; } -#endif /* CONFIG_PPC64 */ +notrace unsigned int check_irq_replay(void) +{ + return __check_irq_replay(); +} +#else /* CONFIG_PPC64 */ +notrace unsigned int check_irq_replay(void) +{ + return 0; +} +#endif /* !CONFIG_PPC64 */ +EXPORT_SYMBOL(check_irq_replay); int arch_show_interrupts(struct seq_file *p, int prec) { diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index c470a40..c4af341 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -151,6 +151,7 @@ static int kgdb_handle_breakpoint(struct pt_regs *regs) return 1; } +extern notrace unsigned int check_irq_replay(void); static int kgdb_singlestep(struct pt_regs *regs) { struct thread_info *thread_info, *exception_thread_info; @@ -181,9 +182,9 @@ static int kgdb_singlestep(struct pt_regs *regs) kgdb_handle_exception(0, SIGTRAP, 0, regs); - if (thread_info != exception_thread_info) - /* Restore current_thread_info lastly. */ - memcpy(exception_thread_info, backup_current_thread_info, sizeof *thread_info); + if ((thread_info != exception_thread_info) (!check_irq_replay())) + /* Restore current_thread_info lastly only if we don't replay interrupt. */ + memcpy(exception_thread_info, backup_current_thread_info, sizeof *thread_info); return 1; } ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 2/2] PPC_BOOK3E/KGDB: support kgdb kernel space
On 10/17/2012 08:07 PM, Andreas Schwab wrote: Tiejun Chen tiejun.c...@windriver.com writes: diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 4684e33..ed5862d 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -539,11 +539,15 @@ kernel_dbg_exc: rfdi /* Normal debug exception */ +#ifndef CONFIG_KGDB /* XXX We only handle coming from userspace for now since we can't * quite save properly an interrupted kernel state yet */ 1:andi. r14,r11,MSR_PR; /* check for userspace again */ beq kernel_dbg_exc; /* if from kernel mode */ +#else +1: +#endif You could move the label before the conditional. Okay. diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 4684e33..73ce1a7 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -538,12 +538,15 @@ kernel_dbg_exc: mfspr r13,SPRN_SPRG_DBG_SCRATCH rfdi +1: /* Normal debug exception */ +#ifndef CONFIG_KGDB /* XXX We only handle coming from userspace for now since we can't * quite save properly an interrupted kernel state yet */ -1: andi. r14,r11,MSR_PR; /* check for userspace again */ + andi. r14,r11,MSR_PR; /* check for userspace again */ beq kernel_dbg_exc; /* if from kernel mode */ +#endif /* Now we mash up things to make it look like we are coming on a * normal exception ... Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [v5][PATCH 2/3] powerpc/kprobe: complete kprobe and migrate exception frame
On 09/18/2012 01:09 PM, Benjamin Herrenschmidt wrote: On Tue, 2012-09-18 at 15:05 +1000, Benjamin Herrenschmidt wrote: On Mon, 2012-09-17 at 17:54 +0800, Tiejun Chen wrote: -#ifdef CONFIG_PREEMPT b restore /* N.B. the only way to get here is from the beq following ret_from_except. */ resume_kernel: - /* check current_thread_info-preempt_count */ + /* check current_thread_info, _TIF_EMULATE_STACK_STORE */ CURRENT_THREAD_INFO(r9, r1) + lwz r8,TI_FLAGS(r9) + andis. r8,r8,_TIF_EMULATE_STACK_STORE@h + beq+1f + + addir8,r1,INT_FRAME_SIZE/* Get the kprobed function entry */ + + lwz r3,GPR1(r1) + subir3,r3,INT_FRAME_SIZE/* dst: Allocate a trampoline exception frame */ + mr r4,r1 /* src: current exception frame */ + li r5,INT_FRAME_SIZE /* size: INT_FRAME_SIZE */ + li r6,0/* start offset: 0 */ + mr r1,r3 /* Reroute the trampoline frame to r1 */ + + /* Copy from the original to the trampoline. */ + li r6,0 You just did that li r6,0 2 lines above :-) I'll fix it up manually while applying. In fact the srwi can be dropped completely, we can just load r5 with the divided value. Committed, will push later today, please test. I retest to kprobe do_fork() and show_interrupts() with/without enabling CONFIG_PREEMPT, separately, looks still work. For 32-bit: + /* Copy from the original to the trampoline. */ + lwz r3,GPR1(r1) + subir3,r3,INT_FRAME_SIZE/* dst: Allocate a trampoline exception frame */ + mr r4,r1 /* src: current exception frame */ + li r5,INT_FRAME_SIZE/4 /* size: INT_FRAME_SIZE */ + li r6,0/* start offset: 0 */ + mr r1,r3 /* Reroute the trampoline frame to r1 */ + mtctr r5 +2: lwzxr0,r6,r4 + stwxr0,r6,r3 + addir6,r6,4 + bdnz2b And for 64-bit: --- + /* Copy from the original to the trampoline. */ + lwz r3,GPR1(r1) + subir3,r3,INT_FRAME_SIZE/* dst: Allocate a trampoline exception frame */ + mr r4,r1 /* src: current exception frame */ + li r5,INT_FRAME_SIZE/8 /* size: INT_FRAME_SIZE */ + li r6,0/* start offset: 0 */ + mr r1,r3 /* Reroute the trampoline frame to r1 */ + mtctr r5 +2: ldx r0,r6,r4 + stdxr0,r6,r3 + addir6,r6,8 + bdnz2b Thanks Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [v5][PATCH 2/3] powerpc/kprobe: complete kprobe and migrate exception frame
On 09/17/2012 06:02 PM, David Laight wrote: /* N.B. the only way to get here is from the beq following ret_from_except. */ resume_kernel: - /* check current_thread_info-preempt_count */ + /* check current_thread_info, _TIF_EMULATE_STACK_STORE */ CURRENT_THREAD_INFO(r9, r1) + lwz r8,TI_FLAGS(r9) + andis. r8,r8,_TIF_EMULATE_STACK_STORE@h + beq+1f ... +1: Does this add a statically mispredicted branch to every return to userspace ? Return usersapce? No, this is just follow 'resume_kernel'. Note I add this 'unlikely' here since I assume often Kprobe is always disabled by default and especially its also rare to kprobe 'stwu' in many kprobe cases. Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [v3][PATCH 2/3] ppc/kprobe: complete kprobe and migrate exception frame
On 09/12/2012 06:38 PM, Benjamin Herrenschmidt wrote: On Wed, 2012-09-12 at 16:55 +0800, tiejun.chen wrote: to worry about nor stack frame to create etc... If you don't like this v4, let me know and then I can go back memcpy for next version. Just open code the whole copy. It should be easy really. As I said, you have the src and dst already in registers and you know they are aligned, so just put the size of the frame in a register (divided by 4), do an mtctr and do a little load_update/store_update loop to do the copy, all in the asm. Is the following Okay? --- arch/powerpc/kernel/entry_32.S | 55 +++- arch/powerpc/kernel/entry_64.S | 45 2 files changed, 94 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index ead5016..3b56bba 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -32,6 +32,7 @@ #include asm/unistd.h #include asm/ftrace.h #include asm/ptrace.h +#include asm/cache.h #undef SHOW_SYSCALLS #undef SHOW_SYSCALLS_TASK @@ -831,19 +832,63 @@ restore_user: bnel- load_dbcr0 #endif -#ifdef CONFIG_PREEMPT b restore /* N.B. the only way to get here is from the beq following ret_from_except. */ resume_kernel: - /* check current_thread_info-preempt_count */ + /* check current_thread_info, _TIF_EMULATE_STACK_STORE */ CURRENT_THREAD_INFO(r9, r1) + lwz r8,TI_FLAGS(r9) + andis. r8,r8,_TIF_EMULATE_STACK_STORE@h + beq+1f + + addir8,r1,INT_FRAME_SIZE/* Get the kprobed function entry */ + + lwz r3,GPR1(r1) + subir3,r3,INT_FRAME_SIZE/* dst: Allocate a trampoline exception frame */ + mr r4,r1 /* src: current exception frame */ + li r5,INT_FRAME_SIZE /* size: INT_FRAME_SIZE */ + li r6,0/* start offset: 0 */ + mr r1,r3 /* Reroute the trampoline frame to r1 */ + + /* Copy from the original to the trampoline. */ + addir5,r5,-4 + addir6,r6,-4 +4: li r0,L1_CACHE_BYTES/4 + mtctr r0 +3: addir6,r6,4 /* copy a cache line */ + lwzxr0,r6,r4 + stwxr0,r6,r3 + bdnz3b + dcbst r6,r3 /* write it to memory */ + sync + cmplw 0,r6,r5 + blt 4b + + /* Do real store operation to complete stwu */ + lwz r5,GPR1(r1) + stw r8,0(r5) + + /* Clear _TIF_EMULATE_STACK_STORE flag */ + lis r11,_TIF_EMULATE_STACK_STORE@h + addir5,r9,TI_FLAGS +0: lwarx r8,0,r5 + andcr8,r8,r11 +#ifdef CONFIG_IBM405_ERR77 + dcbt0,r5 +#endif + stwcx. r8,0,r5 + bne-0b +1: + +#ifdef CONFIG_PREEMPT + /* check current_thread_info-preempt_count */ lwz r0,TI_PREEMPT(r9) cmpwi 0,r0,0 /* if non-zero, just restore regs and return */ bne restore - lwz r0,TI_FLAGS(r9) - andi. r0,r0,_TIF_NEED_RESCHED + andi. r8,r8,_TIF_NEED_RESCHED beq+restore + lwz r3,_MSR(r1) andi. r0,r3,MSR_EE/* interrupts off? */ beq restore /* don't schedule if so */ #ifdef CONFIG_TRACE_IRQFLAGS @@ -864,8 +909,6 @@ resume_kernel: */ bl trace_hardirqs_on #endif -#else -resume_kernel: #endif /* CONFIG_PREEMPT */ /* interrupts are hard-disabled at this point */ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index b40e0b4..cc43b64 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -593,6 +593,51 @@ _GLOBAL(ret_from_except_lite) b .ret_from_except resume_kernel: + /* check current_thread_info, _TIF_EMULATE_STACK_STORE */ + CURRENT_THREAD_INFO(r9, r1) + ld r8,TI_FLAGS(r9) + andis. r8,r8,_TIF_EMULATE_STACK_STORE@h + beq+1f + + addir8,r1,INT_FRAME_SIZE/* Get the kprobed function entry */ + + lwz r3,GPR1(r1) + subir3,r3,INT_FRAME_SIZE/* dst: Allocate a trampoline exception frame */ + mr r4,r1 /* src: current exception frame */ + li r5,INT_FRAME_SIZE /* size: INT_FRAME_SIZE */ + li r6,0/* start offset: 0 */ + mr r1,r3 /* Reroute the trampoline frame to r1 */ + + /* Copy from the original to the trampoline. */ + addir5,r5,-8 + addir6,r6,-8 +4: li r0,8 + mtctr r0 +3: addir6,r6,8 /* copy a cache line*/ + ldx r0,r6,r4 + stdxr0,r6,r3 + bdnz3b + dcbst r6,r3 /* write it to memory */ + sync + cmpld 0,r6,r5
Re: [v3][PATCH 2/3] ppc/kprobe: complete kprobe and migrate exception frame
On 09/11/2012 01:51 PM, Benjamin Herrenschmidt wrote: On Tue, 2012-09-11 at 10:20 +0800, Tiejun Chen wrote: We can't emulate stwu since that may corrupt current exception stack. So we will have to do real store operation in the exception return code. Firstly we'll allocate a trampoline exception frame below the kprobed function stack and copy the current exception frame to the trampoline. Then we can do this real store operation to implement 'stwu', and reroute the trampoline frame to r1 to complete this exception migration. Ok, so not quite there yet :-) See below: Signed-off-by: Tiejun Chen tiejun.c...@windriver.com --- arch/powerpc/kernel/entry_32.S | 45 ++-- arch/powerpc/kernel/entry_64.S | 32 2 files changed, 71 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index ead5016..6cfe12f 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -831,19 +831,54 @@ restore_user: bnel- load_dbcr0 #endif -#ifdef CONFIG_PREEMPT b restore /* N.B. the only way to get here is from the beq following ret_from_except. */ resume_kernel: + /* check current_thread_info, _TIF_EMULATE_STACK_STORE */ + CURRENT_THREAD_INFO(r9, r1) + lwz r0,TI_FLAGS(r9) + andis. r0,r0,_TIF_EMULATE_STACK_STORE@h + beq+1f So you used r0 to load the TI_FLAGS and immediately clobbered it in andis. forcing you to re-load them later down. Instead, put them in r8 lwz r8,TI_FLAGS(r9) andis. r0,r8,_TIF_* beq+* + addir8,r1,INT_FRAME_SIZE/* Get the kprobed function entry */ Then you put your entry in r8 I'll update this for 32b and 64b sections. + lwz r3,GPR1(r1) + subir3,r3,INT_FRAME_SIZE/* dst: Allocate a trampoline exception frame */ + mr r4,r1 /* src: current exception frame */ + li r5,INT_FRAME_SIZE /* size: INT_FRAME_SIZE */ + mr r1,r3 /* Reroute the trampoline frame to r1 */ + bl memcpy /* Copy from the original to the trampoline */ Which you just clobbered... oops :-) So you need to store that old r1 somewhere fist then retrieve it after the memcpy call. That or open-code the memcpy to avoid all the clobbering problems. Maybe we can use copy_and_flush() since looks copy_and_flush() only clobber r0, r6 and LR explicitly. I'll resync these comments for v4. Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [v3][PATCH 2/3] ppc/kprobe: complete kprobe and migrate exception frame
On 09/12/2012 04:43 PM, Benjamin Herrenschmidt wrote: On Wed, 2012-09-12 at 16:38 +0800, tiejun.chen wrote: So you need to store that old r1 somewhere fist then retrieve it after the memcpy call. That or open-code the memcpy to avoid all the clobbering problems. Maybe we can use copy_and_flush() since looks copy_and_flush() only clobber r0, r6 and LR explicitly. I'll resync these comments for v4. I'd say just open code it. You already have src and dst in registers, the length can easily be put in ctr... easier that way, not clobbering ctr should be easier :) to worry about nor stack frame to create etc... If you don't like this v4, let me know and then I can go back memcpy for next version. Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 2/3] powerpc: Bail out of KGDB when we've been triggered
On 08/22/2012 11:07 PM, Tabi Timur-B04825 wrote: On Wed, Aug 22, 2012 at 5:43 AM, Tiejun Chen tiejun.c...@windriver.com wrote: +int kgdb_skipexception(int exception, struct pt_regs *regs) +{ + if (kgdb_isremovedbreak(regs-nip)) + return 1; + + return 0; +} int kgdb_skipexception(int exception, struct pt_regs *regs) { return !!kgdb_isremovedbreak(regs-nip)); } If the caller only cares about zero vs. non-zero, you can drop the !!. Yes, so 'return kgdb_isremovedbreak(regs-nip);' is already fine and simple. I'll update this as v2 so thanks your comment. Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [v2][PATCH 3/3] powerpc/kgdb: restore current_thread_info properly
On 08/23/2012 11:14 AM, Nicholas A. Bellinger wrote: On Thu, 2012-08-23 at 10:10 +0800, Tiejun Chen wrote: For powerpc BooKE and e200, singlestep is handled on the critical/dbg exception stack. This causes current_thread_info() to fail for kgdb internal, so previously We work around this issue by copying the thread_info from the kernel stack before calling kgdb_handle_exception, and copying it back afterwards. But actually we don't do this properly. We should backup current_thread_info then restore that when exit. Signed-off-by: Tiejun Chen tiejun.c...@windriver.com --- v2: fix a typo in patch head description. arch/powerpc/kernel/kgdb.c | 11 +-- 1 files changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 05adb69..c470a40 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -25,6 +25,7 @@ #include asm/processor.h #include asm/machdep.h #include asm/debug.h +#include linux/slab.h /* * This table contains the mapping between PowerPC hardware trap types, and @@ -153,6 +154,8 @@ static int kgdb_handle_breakpoint(struct pt_regs *regs) static int kgdb_singlestep(struct pt_regs *regs) { struct thread_info *thread_info, *exception_thread_info; +struct thread_info *backup_current_thread_info = \ +(struct thread_info *)kmalloc(sizeof(struct thread_info), GFP_KERNEL); Looks like a rouge '\' in the above assignment.. Remove that :) Thanks Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 1/1] booke/wdt: fix incorrect WDIOC_GETSUPPORT return path
On 08/07/2012 09:19 AM, Tabi Timur-B04825 wrote: On Mon, Aug 6, 2012 at 2:12 PM, Tabi Timur-B04825 b04...@freescale.com wrote: On Mon, Jul 30, 2012 at 3:15 AM, Tiejun Chen tiejun.c...@windriver.com wrote: We miss that correct WDIOC_GETSUPPORT return path when perform copy_to_user() properly. Thanks for catching this. I'm amazed that this driver still has bugs like this. While you're at it, I found a few related bugs. Can you fix these, also? 1.case WDIOC_SETOPTIONS: if (get_user(tmp, p)) return -EINVAL; This should return -EFAULT. 2.case WDIOC_GETBOOTSTATUS: /* XXX: something is clearing TSR */ tmp = mfspr(SPRN_TSR) TSR_WRS(3); /* returns CARDRESET if last reset was caused by the WDT */ return (tmp ? WDIOF_CARDRESET : 0); This should use put_user() to return the value, instead of returning it as a return code. You can title the new patch something like, booke/wdt: some ioctls do not return values properly Will regenerate this patch including these error as v2. Thanks Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 1/1] booke/wdt: fix incorrect WDIOC_GETSUPPORT return path
On 07/30/2012 04:15 PM, Tiejun Chen wrote: We miss that correct WDIOC_GETSUPPORT return path when perform copy_to_user() properly. Any comments? Thanks Tiejun Signed-off-by: Tiejun Chen tiejun.c...@windriver.com --- drivers/watchdog/booke_wdt.c |7 --- 1 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/watchdog/booke_wdt.c b/drivers/watchdog/booke_wdt.c index 3fe82d0..2be7f29 100644 --- a/drivers/watchdog/booke_wdt.c +++ b/drivers/watchdog/booke_wdt.c @@ -162,12 +162,13 @@ static long booke_wdt_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { u32 tmp = 0; - u32 __user *p = (u32 __user *)arg; + void __user *argp = (u32 __user *)arg; + u32 __user *p = argp; switch (cmd) { case WDIOC_GETSUPPORT: - if (copy_to_user((void *)arg, ident, sizeof(ident))) - return -EFAULT; + return copy_to_user(argp, ident, + sizeof(ident)) ? -EFAULT : 0; case WDIOC_GETSTATUS: return put_user(0, p); case WDIOC_GETBOOTSTATUS: ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 1/1] powerpc: add memory attribute for mfmsr()
On 07/10/2012 04:19 PM, Benjamin Herrenschmidt wrote: On Tue, 2012-07-10 at 15:59 +0800, Tiejun Chen wrote: Add memory attribute in inline assembly language as a compiler barrier to make sure 4.6.x GCC don't reorder mfmsr(). Out of curiosity, did you see a case where it was re-ordered improperly ? Yes. In my scenario, there's one simple wrapper from local_irq_save(). -- uint32_t XX_DisableAllIntr(void) { unsigned long flags; local_irq_save(flags); return (uint32_t)flags; } When CONFIG_TRACE_IRQFLAGS_SUPPORT is enabled, kernel would expand local_irq_save(). Firstly, please refer to the follows: #define local_irq_save(flags) \ do {\ raw_local_irq_save(flags); \ trace_hardirqs_off(); \ } while (0) #define raw_local_irq_save(flags) \ do {\ typecheck(unsigned long, flags);\ flags = arch_local_irq_save(); \ } while (0) static inline unsigned long arch_local_irq_save(void) { unsigned long flags = arch_local_save_flags(); #ifdef CONFIG_BOOKE asm volatile(wrteei 0 : : : memory); #else SET_MSR_EE(flags ~MSR_EE); #endif return flags; } static inline unsigned long arch_local_save_flags(void) { return mfmsr(); } So local_irq_save(flags) is extended as something like: { #1 flags = mfmsr(); #2 asm volatile(wrteei 0 : : : memory); #3 trace_hardirqs_off(); } But build kernel with our 5.0 toolchain (with/without --with-toolchain-dir=wr-toolchain), (gdb) disassemble XX_DisableAllIntr Dump of assembler code for function XX_DisableAllIntr: 0xc04550c0 +0: mflrr0 0xc04550c4 +4: stw r0,4(r1) 0xc04550c8 +8: bl 0xc000f060 mcount 0xc04550cc +12:stwur1,-16(r1) 0xc04550d0 +16:mflrr0 0xc04550d4 +20:stw r0,20(r1) 0xc04550d8 +24:wrteei 0 0xc04550dc +28:bl 0xc00c6c10 trace_hardirqs_off 0xc04550e0 +32:mfmsr r3 0xc04550e4 +36:lwz r0,20(r1) 0xc04550e8 +40:mtlrr0 0xc04550ec +44:addir1,r1,16 0xc04550f0 +48:blr End of assembler dump. You should notice #2 and #3 is reorganized before #1. This means irq is always disabled before we check irq status via reading msr. Then kernel would work as abnormal sometimes. But with old toolchain (4.5.x) for this same kernel, everything is fine. Tiejun Cheers, Ben. Signed-off-by: Tiejun Chen tiejun.c...@windriver.com --- arch/powerpc/include/asm/reg.h |3 ++- 1 files changed, 2 insertions(+), 1 deletions(-) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 559da19..578e5a0 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1016,7 +1016,8 @@ /* Macros for setting and retrieving special purpose registers */ #ifndef __ASSEMBLY__ #define mfmsr() ({unsigned long rval; \ -asm volatile(mfmsr %0 : =r (rval)); rval;}) +asm volatile(mfmsr %0 : =r (rval) : \ +: memory); rval;}) #ifdef CONFIG_PPC_BOOK3S_64 #define __mtmsrd(v, l) asm volatile(mtmsrd %0, __stringify(l) \ : : r (v) : memory) ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 1/1] powerpc: add memory attribute for mfmsr()
On 07/10/2012 04:22 PM, tiejun.chen wrote: On 07/10/2012 04:19 PM, Benjamin Herrenschmidt wrote: On Tue, 2012-07-10 at 15:59 +0800, Tiejun Chen wrote: Add memory attribute in inline assembly language as a compiler barrier to make sure 4.6.x GCC don't reorder mfmsr(). Out of curiosity, did you see a case where it was re-ordered improperly ? Yes. In my scenario, there's one simple wrapper from local_irq_save(). -- uint32_t XX_DisableAllIntr(void) { unsigned long flags; local_irq_save(flags); return (uint32_t)flags; } When CONFIG_TRACE_IRQFLAGS_SUPPORT is enabled, kernel would expand local_irq_save(). Firstly, please refer to the follows: #define local_irq_save(flags) \ do {\ raw_local_irq_save(flags); \ trace_hardirqs_off(); \ } while (0) #define raw_local_irq_save(flags) \ do {\ typecheck(unsigned long, flags);\ flags = arch_local_irq_save(); \ } while (0) static inline unsigned long arch_local_irq_save(void) { unsigned long flags = arch_local_save_flags(); #ifdef CONFIG_BOOKE asm volatile(wrteei 0 : : : memory); #else SET_MSR_EE(flags ~MSR_EE); #endif return flags; } static inline unsigned long arch_local_save_flags(void) { return mfmsr(); } So local_irq_save(flags) is extended as something like: { #1 flags = mfmsr(); #2 asm volatile(wrteei 0 : : : memory); #3 trace_hardirqs_off(); } But build kernel with our 5.0 toolchain (with/without Note here this toolchain is based on 4.6.3. Tiejun --with-toolchain-dir=wr-toolchain), (gdb) disassemble XX_DisableAllIntr Dump of assembler code for function XX_DisableAllIntr: 0xc04550c0 +0: mflrr0 0xc04550c4 +4: stw r0,4(r1) 0xc04550c8 +8: bl 0xc000f060 mcount 0xc04550cc +12: stwur1,-16(r1) 0xc04550d0 +16: mflrr0 0xc04550d4 +20: stw r0,20(r1) 0xc04550d8 +24: wrteei 0 0xc04550dc +28: bl 0xc00c6c10 trace_hardirqs_off 0xc04550e0 +32: mfmsr r3 0xc04550e4 +36: lwz r0,20(r1) 0xc04550e8 +40: mtlrr0 0xc04550ec +44: addir1,r1,16 0xc04550f0 +48: blr End of assembler dump. You should notice #2 and #3 is reorganized before #1. This means irq is always disabled before we check irq status via reading msr. Then kernel would work as abnormal sometimes. But with old toolchain (4.5.x) for this same kernel, everything is fine. Tiejun Cheers, Ben. Signed-off-by: Tiejun Chen tiejun.c...@windriver.com --- arch/powerpc/include/asm/reg.h |3 ++- 1 files changed, 2 insertions(+), 1 deletions(-) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 559da19..578e5a0 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1016,7 +1016,8 @@ /* Macros for setting and retrieving special purpose registers */ #ifndef __ASSEMBLY__ #define mfmsr()({unsigned long rval; \ - asm volatile(mfmsr %0 : =r (rval)); rval;}) + asm volatile(mfmsr %0 : =r (rval) : \ + : memory); rval;}) #ifdef CONFIG_PPC_BOOK3S_64 #define __mtmsrd(v, l) asm volatile(mtmsrd %0, __stringify(l) \ : : r (v) : memory) ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [v3 PATCH 3/3] ppc32/kprobe: don't emulate store when kprobe stwu r1
On 06/03/2012 01:07 PM, Tiejun Chen wrote: We don't do the real store operation for kprobing 'stwu Rx,(y)R1' since this may corrupt the exception frame, now we will do this operation safely in exception return code after migrate current exception frame below the kprobed function stack. So we only update gpr[1] here and trigger a thread flag to mask this. Note we should make sure if we trigger kernel stack over flow. Signed-off-by: Tiejun Chen tiejun.c...@windriver.com --- arch/powerpc/lib/sstep.c | 37 +++-- 1 files changed, 35 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 9a52349..a4ce463 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -566,7 +566,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) unsigned long int ea; unsigned int cr, mb, me, sh; int err; - unsigned long old_ra; + unsigned long old_ra, val3, r1; long ival; opcode = instr 26; @@ -1486,11 +1486,44 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto ldst_done; case 36:/* stw */ - case 37:/* stwu */ val = regs-gpr[rd]; err = write_mem(val, dform_ea(instr, regs), 4, regs); goto ldst_done; + case 37:/* stwu */ + __asm__ __volatile__(mr %0,1 : =r (r1) :); I'll remove this line, please see below. + + val = regs-gpr[rd]; + val3 = dform_ea(instr, regs); + /* + * For PPC32 we always use stwu to change stack point with r1. So + * this emulated store may corrupt the exception frame, now we + * have to provide the exception frame trampoline, which is pushed + * below the kprobed function stack. So we only update gpr[1] but + * don't emulate the real store operation. We will do real store + * operation safely in exception return code by checking this flag. + */ + if ((ra == 1) !(regs-msr MSR_PR) (val3 = r1)) { + /* + * Check if we will touch kernel sack overflow + */ + if (r1 - STACK_INT_FRAME_SIZE = current-thread.ksp_limit) { OOPS. This line should be: if (val3 - STACK_INT_FRAME_SIZE = current-thread.ksp_limit) { Tiejun + printk(KERN_CRIT Can't kprobe this since Kernel stack overflow.\n); + err = -EINVAL; + break; + } + + /* + * Check if we already set since that means we'll + * lose the previous value. + */ + WARN_ON(test_thread_flag(TIF_EMULATE_STACK_STORE)); + set_thread_flag(TIF_EMULATE_STACK_STORE); + err = 0; + } else + err = write_mem(val, val3, 4, regs); + goto ldst_done; + case 38:/* stb */ case 39:/* stbu */ val = regs-gpr[rd]; ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [v3 PATCH 3/3] ppc32/kprobe: don't emulate store when kprobe stwu r1
On 06/03/2012 02:59 PM, tiejun.chen wrote: On 06/03/2012 01:07 PM, Tiejun Chen wrote: We don't do the real store operation for kprobing 'stwu Rx,(y)R1' since this may corrupt the exception frame, now we will do this operation safely in exception return code after migrate current exception frame below the kprobed function stack. So we only update gpr[1] here and trigger a thread flag to mask this. Note we should make sure if we trigger kernel stack over flow. Signed-off-by: Tiejun Chen tiejun.c...@windriver.com --- arch/powerpc/lib/sstep.c | 37 +++-- 1 files changed, 35 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 9a52349..a4ce463 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -566,7 +566,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) unsigned long int ea; unsigned int cr, mb, me, sh; int err; -unsigned long old_ra; +unsigned long old_ra, val3, r1; long ival; opcode = instr 26; @@ -1486,11 +1486,44 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto ldst_done; case 36:/* stw */ -case 37:/* stwu */ val = regs-gpr[rd]; err = write_mem(val, dform_ea(instr, regs), 4, regs); goto ldst_done; +case 37:/* stwu */ +__asm__ __volatile__(mr %0,1 : =r (r1) :); I'll remove this line, please see below. + +val = regs-gpr[rd]; +val3 = dform_ea(instr, regs); +/* + * For PPC32 we always use stwu to change stack point with r1. So + * this emulated store may corrupt the exception frame, now we + * have to provide the exception frame trampoline, which is pushed + * below the kprobed function stack. So we only update gpr[1] but + * don't emulate the real store operation. We will do real store + * operation safely in exception return code by checking this flag. + */ +if ((ra == 1) !(regs-msr MSR_PR) (val3 = r1)) { And I also should change (val3 = r1) to (val3 = (regs-r1 - STACK_INT_FRAME_SIZE)) since its worth doing this only we'll really overwrite this exception stack. Tiejun +/* + * Check if we will touch kernel sack overflow + */ +if (r1 - STACK_INT_FRAME_SIZE = current-thread.ksp_limit) { OOPS. This line should be: if (val3 - STACK_INT_FRAME_SIZE = current-thread.ksp_limit) { Tiejun +printk(KERN_CRIT Can't kprobe this since Kernel stack overflow.\n); +err = -EINVAL; +break; +} + +/* + * Check if we already set since that means we'll + * lose the previous value. + */ +WARN_ON(test_thread_flag(TIF_EMULATE_STACK_STORE)); +set_thread_flag(TIF_EMULATE_STACK_STORE); +err = 0; +} else +err = write_mem(val, val3, 4, regs); +goto ldst_done; + case 38:/* stb */ case 39:/* stbu */ val = regs-gpr[rd]; ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v2 2/3] ppc32/kprobe: complete kprobe and migrate exception frame
On 05/10/2012 11:50 AM, Benjamin Herrenschmidt wrote: On Thu, 2011-12-15 at 19:00 +0800, Tiejun Chen wrote: We can't emulate stwu since that may corrupt current exception stack. So we will have to do real store operation in the exception return code. Firstly we'll allocate a trampoline exception frame below the kprobed function stack and copy the current exception frame to the trampoline. Then we can do this real store operation to implement 'stwu', and reroute the trampoline frame to r1 to complete this exception migration. Signed-off-by: Tiejun Chen tiejun.c...@windriver.com --- arch/powerpc/kernel/entry_32.S | 35 +++ 1 files changed, 35 insertions(+), 0 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 56212bc..0cdd27d 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -850,6 +850,41 @@ resume_kernel: /* interrupts are hard-disabled at this point */ restore: +lwz r3,_MSR(r1) /* Returning to user mode? */ +andi. r0,r3,MSR_PR +bne 1f .../... Sorry for this delay response since I can't take time to do this last week :( Wouldn't it be better to use resume_kernel here ? Agreed :) IE. We already have restore_user vs. resume_kernel labels, including the PR test. In the !PREEMPT case, resume_kernel is empty but it's there, so you can just populate it, ie, something like: restore_user: ... existing dbcr0 stuff ... b restore resume_kernel: -- removed the ifdef CONFIG_PREEMPT ... Do the stack store business here... #ifdef CONFIG_PREEMPT ... move the preempt code here... #endif restore: ... existing stuff ... Also, the added advantage is that the code to calc the thread info pointer and load the TI_FLAG can be shared now between your stuff and preempt, ie: resume_kernel: rlwinm r9,r1,0,0,(31-THREAD_SHIFT) lwz r0,TI_FLAGS(r9) andis. r0,r0,_TIF_EMULATE_STACK_STORE@h bne-emulate_stack_store #ifdef CONFIG_PREEMPT lwz r8,TI_PREEMPT(r9) -- note use of r8 instead of r0, I -think- r8 can be clobbered here but pls dbl check Its should be safe. cmpwi 0,r8,0 bne restore andi. r0,r0,_TIF_NEED_RESCHED etc... Please check if next, v3, is fine. +/* check current_thread_info, _TIF_EMULATE_STACK_STORE */ +rlwinm r9,r1,0,0,(31-THREAD_SHIFT) +lwz r0,TI_FLAGS(r9) +andis. r0,r0,_TIF_EMULATE_STACK_STORE@h +beq+1f + +addir9,r1,INT_FRAME_SIZE/* Get the kprobed function entry */ + +lwz r3,GPR1(r1) +subir3,r3,INT_FRAME_SIZE/* dst: Allocate a trampoline exception frame */ +mr r4,r1 /* src: current exception frame */ +li r5,INT_FRAME_SIZE /* size: INT_FRAME_SIZE */ +mr r1,r3 /* Reroute the trampoline frame to r1 */ +bl memcpy /* Copy from the original to the trampoline */ + +/* Do real store operation to complete stwu */ +lwz r5,GPR1(r1) +stw r9,0(r5) Ok, I think I -finally- understand your trick of using r1 + INT_FRAME_SIZE as the value to store :-) It makes sense and it's actually a nice hack :-) I would recommend that in the C code part of the emulation, you add some sanity checking to make sure we don't overflow the kernel stack etc... it should come in generally handy especially if what's your trying to debug with kprobes is a kernel stack overflow :-) Added. +/* Clear _TIF_EMULATE_STACK_STORE flag */ +rlwinm r9,r1,0,0,(31-THREAD_SHIFT) +lis r11,_TIF_EMULATE_STACK_STORE@h +addir9,r9,TI_FLAGS +0: lwarx r8,0,r9 +andcr8,r8,r11 +#ifdef CONFIG_IBM405_ERR77 +dcbt0,r9 +#endif +stwcx. r8,0,r9 +bne-0b +1: + #ifdef CONFIG_44x BEGIN_MMU_FTR_SECTION b 1f BTW. Are you going to do a ppc64 variant of that patch ? I'd like to go ppc64 ASAP once we did on ppc32 is good enough :) Tiejun Cheers, Ben. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [v3 PATCH 0/3] ppc32/kprobe: Fix a bug for kprobe stwu r1
On 06/03/2012 01:07 PM, Tiejun Chen wrote: Changes from V2: * populate those existed codes to reorganize codes * add check if we'll trigger kernel stack over flow BTW, I always validate this on mpc8536ds(UP)/mpc8572ds(SMP) with/without CONFIG_PREEMPT. Tiejun Changes from V1: * use memcpy simply to withdraw copy_exc_stack * add !(regs-msr MSR_PR)) and WARN_ON(test_thread_flag(TIF_EMULATE_STACK_STORE)); to make sure we're in goot path. * move this migration process inside 'restore' * clear TIF flag atomically Tiejun Chen (3): powerpc/kprobe: introduce a new thread flag ppc32/kprobe: complete kprobe and migrate exception frame ppc32/kprobe: don't emulate store when kprobe stwu r1 arch/powerpc/include/asm/thread_info.h |3 ++ arch/powerpc/kernel/entry_32.S | 43 ++- arch/powerpc/lib/sstep.c | 37 ++- 3 files changed, 74 insertions(+), 9 deletions(-) ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: How to make a memory region to be cache disabled ? Cpu is mpc82XX or mpc83XX.
hellohello wrote: How to make a memory region to be cache disabled in linux? I'm porting mpc83XX from vxworks to linux. Now meet a question: Cpu use a memory region (GPCM mode of bus) to access peripherals, we need this memory to be cache disabled. Linux Kernel is 2.6.25. Thanks very much for any hint! ioremap() is fine enough since it always set I|G as TLB entry attribute. Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 13/38] KVM: PPC: booke: category E.HV (GS-mode) support
+/* + * Host interrupt handlers may have clobbered these guest-readable + * SPRGs, so we need to reload them here with the guest's values. + */ +lwz r3, VCPU_VRSAVE(r4) +lwz r5, VCPU_SHARED_SPRG4(r11) +mtspr SPRN_VRSAVE, r3 +lwz r6, VCPU_SHARED_SPRG5(r11) +mtspr SPRN_SPRG4W, r5 +lwz r7, VCPU_SHARED_SPRG6(r11) +mtspr SPRN_SPRG5W, r6 +lwz r8, VCPU_SHARED_SPRG7(r11) +mtspr SPRN_SPRG6W, r7 +mtspr SPRN_SPRG7W, r8 + That should be here. +/* Load some guest volatiles. */ +PPC_LL r3, VCPU_LR(r4) +PPC_LL r5, VCPU_XER(r4) +PPC_LL r6, VCPU_CTR(r4) +PPC_LL r7, VCPU_CR(r4) +PPC_LL r8, VCPU_PC(r4) +#ifndef CONFIG_64BIT +lwz r9, (VCPU_SHARED_MSR + 4)(r11) +#else +ld r9, (VCPU_SHARED_MSR)(r11) +#endif +PPC_LL r0, VCPU_GPR(r0)(r4) +PPC_LL r1, VCPU_GPR(r1)(r4) +PPC_LL r2, VCPU_GPR(r2)(r4) +PPC_LL r10, VCPU_GPR(r10)(r4) +PPC_LL r11, VCPU_GPR(r11)(r4) +PPC_LL r12, VCPU_GPR(r12)(r4) +PPC_LL r13, VCPU_GPR(r13)(r4) +mtlrr3 +mtxer r5 +mtctr r6 +mtcrr7 +mtsrr0 r8 +mtsrr1 r9 + +#ifdef CONFIG_KVM_EXIT_TIMING +/* save enter time */ +1: +mfspr r6, SPRN_TBRU +mfspr r7, SPRN_TBRL +mfspr r8, SPRN_TBRU +cmpwr8, r6 Is not we should save guest CR after this otherwise this can corrupt it? I think this should be a typo since in our previous kvm implementation, we always did collect kvm exit timing at the above location :) Tiejun Thanks -Bharat +PPC_STL r7, VCPU_TIMING_LAST_ENTER_TBL(r4) +bne 1b +PPC_STL r8, VCPU_TIMING_LAST_ENTER_TBU(r4) +#endif + +/* Finish loading guest volatiles and jump to guest. */ +PPC_LL r5, VCPU_GPR(r5)(r4) +PPC_LL r6, VCPU_GPR(r6)(r4) +PPC_LL r7, VCPU_GPR(r7)(r4) +PPC_LL r8, VCPU_GPR(r8)(r4) +PPC_LL r9, VCPU_GPR(r9)(r4) + +PPC_LL r3, VCPU_GPR(r3)(r4) +PPC_LL r4, VCPU_GPR(r4)(r4) +rfi ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v5 3/4] KVM: PPC: epapr: install ev_idle hcall for e500 guest
Liu Yu wrote: If the guest hypervisor node contains has-idle property. Signed-off-by: Liu Yu yu@freescale.com --- v5: no change arch/powerpc/kernel/epapr_hcalls.S | 29 + arch/powerpc/kernel/epapr_paravirt.c | 11 ++- 2 files changed, 39 insertions(+), 1 deletions(-) diff --git a/arch/powerpc/kernel/epapr_hcalls.S b/arch/powerpc/kernel/epapr_hcalls.S index 697b390..72fa234 100644 --- a/arch/powerpc/kernel/epapr_hcalls.S +++ b/arch/powerpc/kernel/epapr_hcalls.S @@ -15,6 +15,35 @@ #include asm/ppc_asm.h #include asm/asm-offsets.h +#define HC_VENDOR_EPAPR (1 16) +#define HC_EV_IDLE 16 Why not use 'EV_IDLE' directly? + +_GLOBAL(epapr_ev_idle) +epapr_ev_idle: + rlwinm r3,r1,0,0,31-THREAD_SHIFT /* current thread_info */ + lwz r4,TI_LOCAL_FLAGS(r3) /* set napping bit */ + ori r4,r4,_TLF_NAPPING /* so when we take an exception */ + stw r4,TI_LOCAL_FLAGS(r3) /* it will return to our caller */ + + wrteei 1 + +idle_loop: + LOAD_REG_IMMEDIATE(r11, HC_VENDOR_EPAPR | HC_EV_IDLE) And could this line be simplified as something like this: LOAD_REG_IMMEDIATE(r11, EV_HCALL_TOKEN(EV_IDLE)) If so, even we can remove the previous HC_VENDOR_EPAPR definition as well. Tiejun + +.global epapr_ev_idle_start +epapr_ev_idle_start: + li r3, -1 + nop + nop + nop + + /* + * Guard against spurious wakeups from a hypervisor -- + * only interrupt will cause us to return to LR due to + * _TLF_NAPPING. + */ + b idle_loop + /* Hypercall entry point. Will be patched with device tree instructions. */ .global epapr_hypercall_start epapr_hypercall_start: diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c index e601da7..43d875e 100644 --- a/arch/powerpc/kernel/epapr_paravirt.c +++ b/arch/powerpc/kernel/epapr_paravirt.c @@ -21,6 +21,10 @@ #include asm/epapr_hcalls.h #include asm/cacheflush.h #include asm/code-patching.h +#include asm/machdep.h + +extern void epapr_ev_idle(void); +extern u32 epapr_ev_idle_start[]; bool epapr_para_enabled = false; @@ -39,8 +43,13 @@ static int __init epapr_para_init(void) insts = of_get_property(hyper_node, hcall-instructions, len); if (insts !(len % 4) len = (4 * 4)) { - for (i = 0; i (len / 4); i++) + for (i = 0; i (len / 4); i++) { patch_instruction(epapr_hypercall_start + i, insts[i]); + patch_instruction(epapr_ev_idle_start + i, insts[i]); + } + + if (of_get_property(hyper_node, has-idle, NULL)) + ppc_md.power_save = epapr_ev_idle; epapr_para_enabled = true; } else { ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v5 3/4] KVM: PPC: epapr: install ev_idle hcall for e500 guest
Liu Yu-B13201 wrote: -Original Message- From: tiejun.chen [mailto:tiejun.c...@windriver.com] Sent: Tuesday, February 21, 2012 6:54 PM To: Liu Yu-B13201 Cc: ag...@suse.de; kvm-...@vger.kernel.org; k...@vger.kernel.org; linuxppc-...@ozlabs.org; Wood Scott-B07421 Subject: Re: [PATCH v5 3/4] KVM: PPC: epapr: install ev_idle hcall for e500 guest Liu Yu wrote: If the guest hypervisor node contains has-idle property. Signed-off-by: Liu Yu yu@freescale.com --- v5: no change arch/powerpc/kernel/epapr_hcalls.S | 29 + arch/powerpc/kernel/epapr_paravirt.c | 11 ++- 2 files changed, 39 insertions(+), 1 deletions(-) diff --git a/arch/powerpc/kernel/epapr_hcalls.S b/arch/powerpc/kernel/epapr_hcalls.S index 697b390..72fa234 100644 --- a/arch/powerpc/kernel/epapr_hcalls.S +++ b/arch/powerpc/kernel/epapr_hcalls.S @@ -15,6 +15,35 @@ #include asm/ppc_asm.h #include asm/asm-offsets.h +#define HC_VENDOR_EPAPR(1 16) +#define HC_EV_IDLE 16 Why not use 'EV_IDLE' directly? + +_GLOBAL(epapr_ev_idle) +epapr_ev_idle: + rlwinm r3,r1,0,0,31-THREAD_SHIFT /* current thread_info */ + lwz r4,TI_LOCAL_FLAGS(r3) /* set napping bit */ + ori r4,r4,_TLF_NAPPING /* so when we take an exception */ + stw r4,TI_LOCAL_FLAGS(r3) /* it will return to our caller */ + + wrteei 1 + +idle_loop: + LOAD_REG_IMMEDIATE(r11, HC_VENDOR_EPAPR | HC_EV_IDLE) And could this line be simplified as something like this: LOAD_REG_IMMEDIATE(r11, EV_HCALL_TOKEN(EV_IDLE)) If so, even we can remove the previous HC_VENDOR_EPAPR definition as well. Because the epapr_hcalls.h contains C functions, so it cannot be included by assembly code. These common definitions are already covered in epapr_hcalls.h, but looks you redefine the same items many times, in kvm_para.h/epapr_hcalls.S. And I think maybe we'll also reuse these generics elsewhere lately. So can we limit that with __ASSEMBLY__ in epapr_hcalls.h? Or other way. If so it makes our life easy in the future. Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: Problem in getting shared memory access on P1022RDK
Arshad, Farrukh wrote: I have dumped TLB entries while mapping shared memory. On both cores M-Bit (MAS2[61]) is set in TLB0 entries. On both cores M-Bit is set for all valid TLB1 entries. TLB1 does contains some invalid entries which has M-Bit cleared. So I believe at this time the coherency is not the issue. Any further thoughts on the issue ? Did you check all associated TLBx VPN/RPN and attribute setting are same between two scenarios: W - Core0 R - Core1 and W - Core1 R - Core0? Can you send me your TLB dump log separately if possible? And did you try this flag 'O_SYNC'? Tiejun I have modified dump_tlb_book3e function (found in arch/powerpc/xmon/xmon.c) function for BookE MMU to dump TLB entries. Regards, Farrukh Arshad -Original Message- From: tiejun.chen [mailto:tiejun.c...@windriver.com] Sent: Thursday, January 12, 2012 1:09 PM To: Arshad, Farrukh Cc: Scott Wood; linuxppc-dev@lists.ozlabs.org Subject: Re: Problem in getting shared memory access on P1022RDK Arshad, Farrukh wrote: Adding more it, I have removed the shared memory kernel driver dependency just to narrow down the problem area and I have written a small piece of code in user space. A writer a reader application which access the shared memory and I got the same behavior as with the shared memory kernel driver. Interestingly, my user space application work fine on P1022DS but not on P1022RDK however both using the same CPU modules. When I write a simple string on shared memory from Core 1 it is read at Core 0 properly When I write a simple string on shared memory from Core 0 it is not read at Core 1. Did you dump TLB entry to check page memory coherence attribute for a shared memory as I mentioned previously? This should be consistent on both sides. With this test now I am sure the problem lies in the kernel itself. Any pointers to look for the troubled area ? My application code is (error checking and other code is omitted) #define SHM_BASE0x1C00 #define SHM_SIZE0x40// 4 MB of Shared Memory #define PAGE_SIZE (4*1024) fd = open(device, O_RDWR); You may need to add with 'O_SYNC'. Tiejun shm = malloc(SHM_SIZE + (PAGE_SIZE - 1)); if ( (unsigned long) shm % PAGE_SIZE) { shm += PAGE_SIZE - ((unsigned long)shm % PAGE_SIZE); } shm = mmap(shm, SHM_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED | MAP_FIXED, fd, SHM_BASE); .. .. write some string at shm. My memory partitioning for both systems is Core Base AddressSize Core 0 0x, 0x1000, Core 1 0x1000, 0x0C00, Shared Memory0x1C00, 0x0400, Regards, Farrukh Arshad. Mentor Graphics Pakistan ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: Problem in getting shared memory access on P1022RDK
Arshad, Farrukh wrote: Adding more it, I have removed the shared memory kernel driver dependency just to narrow down the problem area and I have written a small piece of code in user space. A writer a reader application which access the shared memory and I got the same behavior as with the shared memory kernel driver. Interestingly, my user space application work fine on P1022DS but not on P1022RDK however both using the same CPU modules. When I write a simple string on shared memory from Core 1 it is read at Core 0 properly When I write a simple string on shared memory from Core 0 it is not read at Core 1. Did you dump TLB entry to check page memory coherence attribute for a shared memory as I mentioned previously? This should be consistent on both sides. With this test now I am sure the problem lies in the kernel itself. Any pointers to look for the troubled area ? My application code is (error checking and other code is omitted) #define SHM_BASE0x1C00 #define SHM_SIZE0x40// 4 MB of Shared Memory #define PAGE_SIZE (4*1024) fd = open(device, O_RDWR); You may need to add with 'O_SYNC'. Tiejun shm = malloc(SHM_SIZE + (PAGE_SIZE - 1)); if ( (unsigned long) shm % PAGE_SIZE) { shm += PAGE_SIZE - ((unsigned long)shm % PAGE_SIZE); } shm = mmap(shm, SHM_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED | MAP_FIXED, fd, SHM_BASE); .. .. write some string at shm. My memory partitioning for both systems is Core Base AddressSize Core 00x, 0x1000, Core 10x1000, 0x0C00, Shared Memory 0x1C00, 0x0400, Regards, Farrukh Arshad. Mentor Graphics Pakistan ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v2 0/3] ppc32/kprobe: Fix a bug for kprobe stwu r1
Tiejun Chen wrote: Changes from V1: * use memcpy simply to withdraw copy_exc_stack * add !(regs-msr MSR_PR)) and WARN_ON(test_thread_flag(TIF_EMULATE_STACK_STORE)); to make sure we're in goot path. * move this migration process inside 'restore' * clear TIF flag atomically Ben, Is this series OK? Thanks Tiejun Tiejun Chen (3): powerpc/kprobe: introduce a new thread flag ppc32/kprobe: complete kprobe and migrate exception frame ppc32/kprobe: don't emulate store when kprobe stwu r1 arch/powerpc/include/asm/thread_info.h |3 ++ arch/powerpc/kernel/entry_32.S | 35 arch/powerpc/lib/sstep.c | 25 +- 3 files changed, 61 insertions(+), 2 deletions(-) Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: Problem in getting shared memory access on P1022RDK
Arshad, Farrukh wrote: How can I verify if the memory mapped is coherent on both cores. My memory partitioning is given below Core Base AddressSize Core 00x, 0x1000, -- CONFIG_PHYSICAL_START = bootm_low = Base Address Core 10x1000, 0x0C00, -- CONFIG_PHYSICAL_START = bootm_low = Base Address Shared Mem0x1C00, 0x0400, Was the kernel option, CONFIG_SMP, enabled for both two kernels? CONFIG_SMP would affect the memory attribute for cache coherency. Maybe you should make sure if kernel have a appropriate memory attribute by dumping TLB entry. Tiejun Regards, Farrukh Arshad -Original Message- From: Scott Wood [mailto:scottw...@freescale.com] Sent: Tuesday, January 03, 2012 10:10 PM To: Arshad, Farrukh Cc: linuxppc-dev@lists.ozlabs.org Subject: Re: Problem in getting shared memory access on P1022RDK On 01/03/2012 03:42 AM, Arshad, Farrukh wrote: Adding more to it, When I write from Core 1 on the shared memory region it is visible at Core 0 and it can read what I have written from Core 1 but when I write from Core 0 on this shared memory it is not visible on Core 1. Is the memory mapped coherent on both cores? -Scott ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: Kernel not booting when supplying boot parameter mem
Arshad, Farrukh wrote: Greetings All, I have a basic question. I have 512 MB memory. I want my kernel to use only last 128 MB of memory starting from address 0x1000. I have configured the kernel CONFIG_PHYSICAL_START=0x1000 and in kernel boot parameter I have set mem=128M. In this scenario my kernel is not booting and it just stuck after uncompressing it. If I do not provide mem=128M boot parameter my kernel boots fine, but in that case I can not restrict kernel to use only 128M memory. Why supplying mem=128M causing kernel to fail. Any kernel option else is configured, such as CONFIG_KERNEL_START and CONFIG_RELOCATABLE? Or any u-boot argument is reconfigured, such as 'bootm_low', 'bootm_mapsize' and 'bootm_size'. Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: Kernel not booting when supplying boot parameter mem
Arshad, Farrukh wrote: Hi Tiejun, Thanks for your response. Yes, I am running two kernels one on each core in SAMP configuration on P1022RDK board. Given is my memory partitioning. Core 0 is loading fine but Core 1 is not loading. CONFIG_RELOCATABLE is not set. -- Core | Base Address |Size | Uboot parameters | Kernel Configuration | -|---|-|--|| Core 0 (MEL RT Kernel)| 0x, | 0x1000, - 256 (MB)|bootm_low = 0x,, bootm_size = 0x1000, | CONFIG_PHYSICAL_START = 0x,, CONFIG_KERNEL_START = 0xC000, | Core 1 (LTIB Kernel) |0x1000,| 0x0800, - 128 (MB)|bootm_low = 0x1000,, bootm_size = 0x0800, | CONFIG_PHYSICAL_START = 0x1000,, CONFIG_KERNEL_START = 0xC000, | -- Please check if the following commit is already in your kernel: -- powerpc: Fix memory limits when starting at a non-zero address memblock_enforce_memory_limit() takes the desired maximum quantity of memory to end up with, not an address above which memory will not be used. Tiejun ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 3/4] ppc32/kprobe: complete kprobe and migrate exception frame
Looks we have to go into 'restore' at last as I said previously. I send v2 based on your all comments. I assume it may not necessary to reorganize ret_from_except for *ppc32* . It might be cleaner but I can do that myself later. I have this version but I'm not 100% sure if its as you expect :) #define _TIF_WORK_MASK (_TIF_USER_WORK_MASK | _TIF_EMULATE_STACK_STORE) == diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 56212bc..e52b586 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -791,41 +791,29 @@ ret_from_except: SYNC/* Some chip revs have problems here... */ MTMSRD(r10) /* disable interrupts */ - lwz r3,_MSR(r1) /* Returning to user mode? */ - andi. r0,r3,MSR_PR - beq resume_kernel - user_exc_return: /* r10 contains MSR_KERNEL here */ /* Check current_thread_info()-flags */ rlwinm r9,r1,0,0,(31-THREAD_SHIFT) lwz r9,TI_FLAGS(r9) - andi. r0,r9,_TIF_USER_WORK_MASK - bne do_work + andi. r0,r9,_TIF_WORK_MASK + beq restore -restore_user: -#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) - /* Check whether this process has its own DBCR0 value. The internal - debug mode bit tells us that dbcr0 should be loaded. */ - lwz r0,THREAD+THREAD_DBCR0(r2) - andis. r10,r0,DBCR0_IDM@h - bnel- load_dbcr0 -#endif + lwz r3,_MSR(r1) /* Returning to user mode? */ + andi. r0,r3,MSR_PR + bne do_user_work #ifdef CONFIG_PREEMPT - b restore - /* N.B. the only way to get here is from the beq following ret_from_except. */ -resume_kernel: /* check current_thread_info-preempt_count */ rlwinm r9,r1,0,0,(31-THREAD_SHIFT) lwz r0,TI_PREEMPT(r9) cmpwi 0,r0,0 /* if non-zero, just restore regs and return */ - bne restore + bne 2f lwz r0,TI_FLAGS(r9) andi. r0,r0,_TIF_NEED_RESCHED - beq+restore + beq+2f andi. r0,r3,MSR_EE/* interrupts off? */ - beq restore /* don't schedule if so */ + beq 2f /* don't schedule if so */ #ifdef CONFIG_TRACE_IRQFLAGS /* Lockdep thinks irqs are enabled, we need to call * preempt_schedule_irq with IRQs off, so we inform lockdep @@ -844,12 +832,54 @@ resume_kernel: */ bl trace_hardirqs_on #endif -#else -resume_kernel: +2: #endif /* CONFIG_PREEMPT */ + /* check current_thread_info, _TIF_EMULATE_STACK_STORE */ + rlwinm r9,r1,0,0,(31-THREAD_SHIFT) + lwz r0,TI_FLAGS(r9) + andis. r0,r0,_TIF_EMULATE_STACK_STORE@h + beq+restore + + addir9,r1,INT_FRAME_SIZE/* Get the kprobed function entry */ + + lwz r3,GPR1(r1) + subir3,r3,INT_FRAME_SIZE/* dst: Allocate a trampoline exception frame */ + mr r4,r1 /* src: current exception frame */ + li r5,INT_FRAME_SIZE /* size: INT_FRAME_SIZE */ + mr r1,r3 /* Reroute the trampoline frame to r1 */ + bl memcpy /* Copy from the original to the trampoline */ + + /* Do real store operation to complete stwu */ + lwz r5,GPR1(r1) + stw r9,0(r5) + + /* Do real store operation to complete stwu */ + lwz r5,GPR1(r1) + stw r9,0(r5) + + /* Clear _TIF_EMULATE_STACK_STORE flag */ + rlwinm r9,r1,0,0,(31-THREAD_SHIFT) + lis r11,_TIF_EMULATE_STACK_STORE@h + addir9,r9,TI_FLAGS +0: lwarx r8,0,r9 + andcr8,r8,r11 +#ifdef CONFIG_IBM405_ERR77 + dcbt0,r9 +#endif + stwcx. r8,0,r9 + bne-0b + /* interrupts are hard-disabled at this point */ restore: +#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) + lwz r3,_MSR(r1) /* Returning to user mode? */ + andi. r0,r3,MSR_PR + beq 1f + /* Check whether this process has its own DBCR0 value. The internal + debug mode bit tells us that dbcr0 should be loaded. */ + lwz r0,THREAD+THREAD_DBCR0(r2) + andis. r10,r0,DBCR0_IDM@h + bnel- load_dbcr0 +1: +#endif + #ifdef CONFIG_44x BEGIN_MMU_FTR_SECTION b 1f @@ -1159,7 +1189,7 @@ global_dbcr0: .previous #endif /* !(CONFIG_4xx || CONFIG_BOOKE) */ -do_work: /* r10 contains MSR_KERNEL here */ +do_user_work: /* r10 contains MSR_KERNEL here */ andi. r0,r9,_TIF_NEED_RESCHED beq do_user_signal @@ -1184,7 +1214,7 @@ recheck: andi. r0,r9,_TIF_NEED_RESCHED bne-do_resched andi. r0,r9,_TIF_USER_WORK_MASK - beq restore_user + beq restore do_user_signal:/* r10 contains MSR_KERNEL here