[PATCH v6 2/4] powerpc/85xx: add hardware automatically enter altivec idle state
From: Wang Dongsheng dongsheng.w...@freescale.com Each core's AltiVec unit may be placed into a power savings mode by turning off power to the unit. Core hardware will automatically power down the AltiVec unit after no AltiVec instructions have executed in N cycles. The AltiVec power-control is triggered by hardware. Signed-off-by: Wang Dongsheng dongsheng.w...@freescale.com --- *v3: Assembly code instead of C code. *v2: Remove: delete setup_idle_hw_governor function. delete Fix erratum for rev1. Move: move setup_* into __setup/restore_cpu_e6500. arch/powerpc/kernel/cpu_setup_fsl_booke.S | 21 + 1 file changed, 21 insertions(+) diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S index bfb18c7..4789056 100644 --- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S +++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S @@ -53,11 +53,31 @@ _GLOBAL(__e500_dcache_setup) isync blr +/* + * FIXME - we haven't yet done testing to determine a reasonable default + * value for AV_WAIT_IDLE_BIT. + */ +#define AV_WAIT_IDLE_BIT 50 /* 1ms, TB frequency is 41.66MHZ */ +_GLOBAL(setup_altivec_idle) + mfspr r3, SPRN_PWRMGTCR0 + + /* Enable Altivec Idle */ + orisr3, r3, PWRMGTCR0_AV_IDLE_PD_EN@h + li r11, AV_WAIT_IDLE_BIT + + /* Set Automatic AltiVec Idle Count */ + rlwimi r3, r11, PWRMGTCR0_AV_IDLE_CNT_SHIFT, PWRMGTCR0_AV_IDLE_CNT + + mtspr SPRN_PWRMGTCR0, r3 + + blr + _GLOBAL(__setup_cpu_e6500) mflrr6 #ifdef CONFIG_PPC64 bl .setup_altivec_ivors #endif + bl setup_altivec_idle bl __setup_cpu_e5500 mtlrr6 blr @@ -119,6 +139,7 @@ _GLOBAL(__setup_cpu_e5500) _GLOBAL(__restore_cpu_e6500) mflrr5 bl .setup_altivec_ivors + bl .setup_altivec_idle bl __restore_cpu_e5500 mtlrr5 blr -- 1.8.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v6 3/4] powerpc/85xx: add hardware automatically enter pw20 state
From: Wang Dongsheng dongsheng.w...@freescale.com Using hardware features make core automatically enter PW20 state. Set a TB count to hardware, the effective count begins when PW10 is entered. When the effective period has expired, the core will proceed from PW10 to PW20 if no exit conditions have occurred during the period. Signed-off-by: Wang Dongsheng dongsheng.w...@freescale.com --- *v3: Assembly code instead of C code. *v2: Remove: delete setup_idle_hw_governor function. delete Fix erratum for rev1. Move: move setup_* into __setup/restore_cpu_e6500. arch/powerpc/kernel/cpu_setup_fsl_booke.S | 21 + 1 file changed, 21 insertions(+) diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S index 4789056..49e738e 100644 --- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S +++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S @@ -55,6 +55,25 @@ _GLOBAL(__e500_dcache_setup) /* * FIXME - we haven't yet done testing to determine a reasonable default + * value for PW20_WAIT_IDLE_BIT. + */ +#define PW20_WAIT_IDLE_BIT 50 /* 1ms, TB frequency is 41.66MHZ */ +_GLOBAL(setup_pw20_idle) + mfspr r3, SPRN_PWRMGTCR0 + + /* Set PW20_WAIT bit, enable pw20 state*/ + ori r3, r3, PWRMGTCR0_PW20_WAIT + li r11, PW20_WAIT_IDLE_BIT + + /* Set Automatic PW20 Core Idle Count */ + rlwimi r3, r11, PWRMGTCR0_PW20_ENT_SHIFT, PWRMGTCR0_PW20_ENT + + mtspr SPRN_PWRMGTCR0, r3 + + blr + +/* + * FIXME - we haven't yet done testing to determine a reasonable default * value for AV_WAIT_IDLE_BIT. */ #define AV_WAIT_IDLE_BIT 50 /* 1ms, TB frequency is 41.66MHZ */ @@ -77,6 +96,7 @@ _GLOBAL(__setup_cpu_e6500) #ifdef CONFIG_PPC64 bl .setup_altivec_ivors #endif + bl setup_pw20_idle bl setup_altivec_idle bl __setup_cpu_e5500 mtlrr6 @@ -139,6 +159,7 @@ _GLOBAL(__setup_cpu_e5500) _GLOBAL(__restore_cpu_e6500) mflrr5 bl .setup_altivec_ivors + bl .setup_pw20_idle bl .setup_altivec_idle bl __restore_cpu_e5500 mtlrr5 -- 1.8.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v6 4/4] powerpc/85xx: add sysfs for pw20 state and altivec idle
From: Wang Dongsheng dongsheng.w...@freescale.com Add a sys interface to enable/diable pw20 state or altivec idle, and control the wait entry time. Enable/Disable interface: 0, disable. 1, enable. /sys/devices/system/cpu/cpuX/pw20_state /sys/devices/system/cpu/cpuX/altivec_idle Set wait time interface:(Nanosecond) /sys/devices/system/cpu/cpuX/pw20_wait_time /sys/devices/system/cpu/cpuX/altivec_idle_wait_time Example: Base on TBfreq is 41MHZ. 1~48(ns): TB[63] 49~97(ns): TB[62] 98~195(ns): TB[61] 196~390(ns): TB[60] 391~780(ns): TB[59] 781~1560(ns): TB[58] ... Signed-off-by: Wang Dongsheng dongsheng.w...@freescale.com --- *v6: Change show_pw20_wait_time/show_altivec_idle_wait_time functions implementation. *v5: Change get_idle_ticks_bit function implementation. *v4: Move code from 85xx/common.c to kernel/sysfs.c. Remove has_pw20_altivec_idle function. Change wait entry_bit to wait time. diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index b4e6676..6c92e23 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -86,6 +86,304 @@ __setup(smt-snooze-delay=, setup_smt_snooze_delay); #endif /* CONFIG_PPC64 */ +#ifdef CONFIG_FSL_SOC +#define MAX_BIT63 + +static u64 pw20_wt; +static u64 altivec_idle_wt; + +static unsigned int get_idle_ticks_bit(u64 ns) +{ + u64 cycle; + + if (ns = 1) + cycle = div_u64(ns + 500, 1000) * tb_ticks_per_usec; + else + cycle = div_u64(ns * tb_ticks_per_usec, 1000); + + if (!cycle) + return 0; + + return ilog2(cycle); +} + +static void do_show_pwrmgtcr0(void *val) +{ + u32 *value = val; + + *value = mfspr(SPRN_PWRMGTCR0); +} + +static ssize_t show_pw20_state(struct device *dev, + struct device_attribute *attr, char *buf) +{ + u32 value; + unsigned int cpu = dev-id; + + smp_call_function_single(cpu, do_show_pwrmgtcr0, value, 1); + + value = PWRMGTCR0_PW20_WAIT; + + return sprintf(buf, %u\n, value ? 1 : 0); +} + +static void do_store_pw20_state(void *val) +{ + u32 *value = val; + u32 pw20_state; + + pw20_state = mfspr(SPRN_PWRMGTCR0); + + if (*value) + pw20_state |= PWRMGTCR0_PW20_WAIT; + else + pw20_state = ~PWRMGTCR0_PW20_WAIT; + + mtspr(SPRN_PWRMGTCR0, pw20_state); +} + +static ssize_t store_pw20_state(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + u32 value; + unsigned int cpu = dev-id; + + if (kstrtou32(buf, 0, value)) + return -EINVAL; + + if (value 1) + return -EINVAL; + + smp_call_function_single(cpu, do_store_pw20_state, value, 1); + + return count; +} + +static ssize_t show_pw20_wait_time(struct device *dev, + struct device_attribute *attr, char *buf) +{ + u32 value; + u64 tb_cycle = 1; + u64 time; + + unsigned int cpu = dev-id; + + if (!pw20_wt) { + smp_call_function_single(cpu, do_show_pwrmgtcr0, value, 1); + value = (value PWRMGTCR0_PW20_ENT) + PWRMGTCR0_PW20_ENT_SHIFT; + + tb_cycle = (tb_cycle (MAX_BIT - value + 1)); + /* convert ms to ns */ + if (tb_ticks_per_usec 1000) { + time = div_u64(tb_cycle, tb_ticks_per_usec / 1000); + } else { + u32 rem_us; + + time = div_u64_rem(tb_cycle, tb_ticks_per_usec, + rem_us); + time = time * 1000 + rem_us * 1000 / tb_ticks_per_usec; + } + } else { + time = pw20_wt; + } + + return sprintf(buf, %llu\n, time 0 ? time : 0); +} + +static void set_pw20_wait_entry_bit(void *val) +{ + u32 *value = val; + u32 pw20_idle; + + pw20_idle = mfspr(SPRN_PWRMGTCR0); + + /* Set Automatic PW20 Core Idle Count */ + /* clear count */ + pw20_idle = ~PWRMGTCR0_PW20_ENT; + + /* set count */ + pw20_idle |= ((MAX_BIT - *value) PWRMGTCR0_PW20_ENT_SHIFT); + + mtspr(SPRN_PWRMGTCR0, pw20_idle); +} + +static ssize_t store_pw20_wait_time(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + u32 entry_bit; + u64 value; + + unsigned int cpu = dev-id; + + if (kstrtou64(buf, 0, value)) + return -EINVAL; + + if (!value) + return -EINVAL; + + entry_bit = get_idle_ticks_bit(value); + if (entry_bit MAX_BIT) + return -EINVAL; + + pw20_wt = value; + +
Re: [PATCH] DTS: DMA: Fix DMA3 interrupts
On Wed, Dec 11, 2013 at 2:33 AM, Scott Wood scottw...@freescale.com wrote: On Tue, 2013-12-10 at 18:33 +0800, Hongbo Zhang wrote: Scott, This issue is due to the non-continuous MPIC register, I think there is two ways to fix it. The first one is as what we are discussing, in fact the Bman/Qman DT author had introduced this way, and I had to follow it, it is a trick, adding 208 is a bit ugly I think, and even difficult to explain it to customers etc, but this way changes less codes. The second one is editing MPIC related codes without adding 208 to high interrupts. The point of translate interrupt number to MPIC register address is a so called 'isu' mechanism, we can do like the following example codes, then the tricky adding 208 isn't needed any more. Which one do you prefer? In fact I myself prefer the second, if the idea is acceptable, I will send a patch instead of this one. (and also alone with the internal patch decreasing 208 for the Bman/Qman) void __init corenet_ds_pic_init(void) { .. mpic = mpic_alloc(NULL, 0, flags, 0, 512, OpenPIC); BUG_ON(mpic == NULL); // Add this start for (i = 0; i 17; i++) { if (i 11) addr_off = 0x1 + 0x20 * 16 * i; else addr_off = 0x13000 + 0x20 * 16 * (i - 11); /* scape the address not for interrupts */ mpic_assign_isu(mpic, i, mpic-paddr + addr_off); } // Add this end mpic_init(mpic); } NACK We already have a binding that states that the interrupt number is based on the register offset, rather than whatever arbitrary numbers hardware documenters decide to use next week. While I'm not terribly happy with the usability of this, especially now that it's not a simple add 16, redefining the existing binding is not OK (and in any case the code above seems obfuscatory). If we decide to do something other than continue with register offset divided by 32, then we need to define a new interrupt type (similar to current defined types of error interrupt, timer, and IPI) for the new numberspace -- and it should be handled when decoding that type of interrupt specifier, rather than with the isu mechanism. I had to say that the current binding is terribly confusing. I know a lot of people who were confused while looking into the device tree and I had to help them out now and then. I even got confused for some time at the very beginning. :( If we want to keep the current binding, a big warning message is well deserved at the very beginning of the binding document to clarify that the interrupt number used in device tree has nothing to do with the interrupt number mentioned in the silicon reference manuals. I think we should even mention the add 16 magic rule also to make it more intuitive. Adding a new interrupt type for external interrupts is good to make the interrupt number consistent with the Reference Manuals. But doing that requires changing all the device trees that used the previous binding, and we need a mechanism of judging which binding a device tree is complying to. Hongbo's proposal is not a complete fix for the readability issue. But it makes the add 16 magic rule continue to work, instead of making it even worse by introducing the +208 interrupts. The DMA3 patch is the first time that we add the +208 interrupts in the device trees of mainline kernel. It is a good time now to prevent us making the readability even worse while not breaking things out there. ISU is a standard mechanism in MPIC to deal with sparse configuration register space, and IMO a good fit for this situation. Regards, Leo ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Chip Select register with 8bit bus size...
We have a problem to communicate with a register, CS4, at at 0x1002. In U-boot that reg has the value 0x45fab3c1, but when we try to access it we get: 0x10101010 and we are not able to write too. With CS3 everything seems ok, we can read and write. CS3 is at: 0x1000. The main/only differences between cs3 and cs4 are: Chip Select: Lp_cs3 Bus size: 32 bit Bus control: 2 wait state R/W ACK disabled Allocated size 32Kbyte Chip Select: Lp_cs4 Bus size: 8 bit Bus control: 2 wait state R/W ACK disabled Allocated size: 4 KByte In userspace we use: /**/ //code from memedit.c int fd; fd = open(/dev/mem, O_SYNC | O_RDWR); mem = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED, fd, offset (~4095)); printf(/dev/mem[0x%08x] = 0x%08x, offset, *(unsigned int*)mem[offset 4095]); //to write *((unsigned int *)mem[offset 4095]) = input; /**/ In our kernel module: /**/ #define CS4_START0x1002U #define CS4_STOP 0x1004U #define CS4_SIZE 0x0002U #define CS3_START0x1000U #define CS3_STOP 0x1002U #define CS3_SIZE 0x0002U void __iomem *cs3_ioaddr = ioremap ((volatile unsigned long)(CS3_START), CS3_SIZE); printk(We read value at CS3: %x \n\n\n,in_be32(cs3_ioaddr+0x0018)); out_be32(cs3_ioaddr+0x0018,0x0001); printk(We read written value: %x \n\n\n,in_be32(cs3_ioaddr+0x0018)); /**/ Platform is based on mpc5200b CPU and fpga is a Xilinx Virtex4. Kernel we use: 2.6.33 Thanks again in advance… Lorenzo ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
commit e38c0a1f breaks powerpc boards with uli1575 chip
Hi While trying to make freescale p2020ds and mpc8572ds boards working with mainline kernel, I faced that commit e38c0a1f (Handle #address-cells 2 specially) breaks things with these boards. Both these boards have uli1575 chip. Corresponding part in device tree is something like uli1575@0 { reg = 0x0 0x0 0x0 0x0 0x0; #size-cells = 2; #address-cells = 3; ranges = 0x200 0x0 0x8000 0x200 0x0 0x8000 0x0 0x2000 0x100 0x0 0x0 0x100 0x0 0x0 0x0 0x1; isa@1e { ... I.e. it has #address-cells = 3 With commit e38c0a1f reverted, devices under uli1575 are registered correctly, e.g. for rtc OF: ** translation for device /pcie@ffe09000/pcie@0/uli1575@0/isa@1e/rtc@70 ** OF: bus is isa (na=2, ns=1) on /pcie@ffe09000/pcie@0/uli1575@0/isa@1e OF: translating address: 0001 0070 OF: parent bus is default (na=3, ns=2) on /pcie@ffe09000/pcie@0/uli1575@0 OF: walking ranges... OF: ISA map, cp=0, s=1000, da=70 OF: parent translation for: 0100 OF: with offset: 70 OF: one level translation: 0070 OF: parent bus is pci (na=3, ns=2) on /pcie@ffe09000/pcie@0 OF: walking ranges... OF: default map, cp=a000, s=2000, da=70 OF: default map, cp=0, s=1, da=70 OF: parent translation for: 0100 OF: with offset: 70 OF: one level translation: 0100 0070 OF: parent bus is pci (na=3, ns=2) on /pcie@ffe09000 OF: walking ranges... OF: PCI map, cp=0, s=1, da=70 OF: parent translation for: 0100 OF: with offset: 70 OF: one level translation: 0100 0070 OF: parent bus is default (na=2, ns=2) on / OF: walking ranges... OF: PCI map, cp=0, s=1, da=70 OF: parent translation for: ffc1 OF: with offset: 70 OF: one level translation: ffc10070 OF: reached root node With commit e38c0a1f in place, address translation fails: OF: ** translation for device /pcie@ffe09000/pcie@0/uli1575@0/isa@1e/rtc@70 ** OF: bus is isa (na=2, ns=1) on /pcie@ffe09000/pcie@0/uli1575@0/isa@1e OF: translating address: 0001 0070 OF: parent bus is default (na=3, ns=2) on /pcie@ffe09000/pcie@0/uli1575@0 OF: walking ranges... OF: ISA map, cp=0, s=1000, da=70 OF: parent translation for: 0100 OF: with offset: 70 OF: one level translation: 0070 OF: parent bus is pci (na=3, ns=2) on /pcie@ffe09000/pcie@0 OF: walking ranges... OF: default map, cp=a000, s=2000, da=70 OF: default map, cp=0, s=1, da=70 OF: not found ! Either e38c0a1f should be reverted, or uli1575 (and perhaps other similar devices) have to be described in device trees differently. Could someone please comment on this? ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 00/10] Kconfig: cleanup SERIO_I8042 dependencies
This patch series removes the messy dependencies from SERIO_I8042 by having it depend on one variable (ARCH_MAY_HAVE_PC_SERIO) and having architectures which need it select that variable in arch/*/Kconfig. New architectures are unlikely to need SERIO_I8042, so this avoids having an ever growing list of architectures to exclude. If an architecture without i8042 support isn't excluded through the dependency list for SERIO_I8042 or through explicit disabling in a config, it will likely panic on boot with something similar to this (from arm64): [ 27.426181] [ffc000403b1c] i8042_flush+0x88/0x10c [ 27.426251] [ffc00084cc2c] i8042_init+0x58/0xe8 [ 27.426320] [ffc80bec] do_one_initcall+0xc4/0x14c [ 27.426404] [ffc000820970] kernel_init_freeable+0x1a4/0x244 [ 27.426480] [ffc0005a894c] kernel_init+0x18/0x148 [ 27.426561] Code: d2800c82 f2bf7c02 f2dff7e2 f2e2 (39400042) [ 27.426789] ---[ end trace ac076843cf0f383e ]--- [ 27.426875] Kernel panic - not syncing: Attempted to kill init! exitcode=0x000b This is v2 of the patch series. Changes from version 1: o Added acks. arm, ia64, and sh are only ones without acks. o Moved select of ARCH_MIGHT_HAVE_PC_SERIO to board-specific Kconfigs for arm and sh. A tree with these patches is at: git://github.com/mosalter/linux.git (serio-i8042-v2 branch) Mark Salter (10): alpha: select ARCH_MIGHT_HAVE_PC_SERIO arm: select ARCH_MIGHT_HAVE_PC_SERIO ia64: select ARCH_MIGHT_HAVE_PC_SERIO mips: select ARCH_MIGHT_HAVE_PC_SERIO powerpc: select ARCH_MIGHT_HAVE_PC_SERIO sh: select ARCH_MIGHT_HAVE_PC_SERIO for SH_CAYMAN sparc: select ARCH_MIGHT_HAVE_PC_SERIO unicore32: select ARCH_MIGHT_HAVE_PC_SERIO x86: select ARCH_MIGHT_HAVE_PC_SERIO Kconfig: cleanup SERIO_I8042 dependencies arch/alpha/Kconfig | 1 + arch/arm/mach-footbridge/Kconfig | 1 + arch/ia64/Kconfig| 1 + arch/mips/Kconfig| 1 + arch/powerpc/Kconfig | 1 + arch/sh/boards/Kconfig | 1 + arch/sparc/Kconfig | 1 + arch/unicore32/Kconfig | 1 + arch/x86/Kconfig | 1 + drivers/input/serio/Kconfig | 11 --- 10 files changed, 17 insertions(+), 3 deletions(-) -- 1.8.3.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 10/10] Kconfig: cleanup SERIO_I8042 dependencies
Remove messy dependencies from SERIO_I8042 by having it depend on one Kconfig symbol (ARCH_MIGHT_HAVE_PC_SERIO) and having architectures which need it select ARCH_MIGHT_HAVE_PC_SERIO in arch/*/Kconfig. New architectures are unlikely to need SERIO_I8042, so this avoids having an ever growing list of architectures to exclude. Signed-off-by: Mark Salter msal...@redhat.com Acked-by: H. Peter Anvin h...@zytor.com Acked-by: Ralf Baechle r...@linux-mips.org Acked-by: Benjamin Herrenschmidt b...@kernel.crashing.org CC: Dmitry Torokhov dmitry.torok...@gmail.com CC: Richard Henderson r...@twiddle.net CC: linux-al...@vger.kernel.org CC: Russell King li...@arm.linux.org.uk CC: linux-arm-ker...@lists.infradead.org CC: Tony Luck tony.l...@intel.com CC: Fenghua Yu fenghua...@intel.com CC: linux-i...@vger.kernel.org CC: linux-m...@linux-mips.org CC: Paul Mackerras pau...@samba.org CC: linuxppc-dev@lists.ozlabs.org CC: Paul Mundt let...@linux-sh.org CC: linux...@vger.kernel.org CC: David S. Miller da...@davemloft.net CC: sparcli...@vger.kernel.org CC: Guan Xuetao g...@mprc.pku.edu.cn CC: Ingo Molnar mi...@redhat.com CC: Thomas Gleixner t...@linutronix.de CC: x...@kernel.org --- drivers/input/serio/Kconfig | 11 --- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/input/serio/Kconfig b/drivers/input/serio/Kconfig index 8541f94..1f5cec2 100644 --- a/drivers/input/serio/Kconfig +++ b/drivers/input/serio/Kconfig @@ -16,14 +16,19 @@ config SERIO To compile this driver as a module, choose M here: the module will be called serio. +config ARCH_MIGHT_HAVE_PC_SERIO + bool + help + Select this config option from the architecture Kconfig if + the architecture might use a PC serio device (i8042) to + communicate with keyboard, mouse, etc. + if SERIO config SERIO_I8042 tristate i8042 PC Keyboard controller default y - depends on !PARISC (!ARM || FOOTBRIDGE_HOST) \ - (!SUPERH || SH_CAYMAN) !M68K !BLACKFIN !S390 \ - !ARC + depends on ARCH_MIGHT_HAVE_PC_SERIO help i8042 is the chip over which the standard AT keyboard and PS/2 mouse are connected to the computer. If you use these devices, -- 1.8.3.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 05/10] powerpc: select ARCH_MIGHT_HAVE_PC_SERIO
Architectures which might use an i8042 for serial IO to keyboard, mouse, etc should select ARCH_MIGHT_HAVE_PC_SERIO. Signed-off-by: Mark Salter msal...@redhat.com Acked-by: Benjamin Herrenschmidt b...@kernel.crashing.org CC: Paul Mackerras pau...@samba.org CC: linuxppc-dev@lists.ozlabs.org --- arch/powerpc/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index b44b52c..fb75485 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -86,6 +86,7 @@ config PPC bool default y select ARCH_MIGHT_HAVE_PC_PARPORT + select ARCH_MIGHT_HAVE_PC_SERIO select BINFMT_ELF select OF select OF_EARLY_FLATTREE -- 1.8.3.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH] of/irq: Fix device_node refcount in of_irq_parse_raw()
Commit 2361613206e6, of/irq: Refactor interrupt-map parsing changed the refcount on the device_node causing an error in of_node_put(): ERROR: Bad of_node_put() on /pci@8002000 CPU: 0 PID: 1 Comm: swapper/0 Not tainted 3.13.0-rc3-dirty #2 Call Trace: [c0003e403500] [c00144fc] .show_stack+0x7c/0x1f0 (unreliable) [c0003e4035d0] [c070f250] .dump_stack+0x88/0xb4 [c0003e403650] [c05e8768] .of_node_release+0xd8/0xf0 [c0003e4036e0] [c05eeafc] .of_irq_parse_one+0x10c/0x280 [c0003e4037a0] [c05efd4c] .of_irq_parse_pci+0x3c/0x1d0 [c0003e403840] [c0038240] .pcibios_setup_device+0xa0/0x2e0 [c0003e403910] [c00398f0] .pcibios_setup_bus_devices+0x60/0xd0 [c0003e403990] [c003b3a4] .__of_scan_bus+0x1a4/0x2b0 [c0003e403a80] [c003a62c] .pcibios_scan_phb+0x30c/0x410 [c0003e403b60] [c09fe430] .pcibios_init+0x7c/0xd4 This patch adjusts the refcount in the walk of the interrupt tree. When a match is found, there is no need to increase the refcount on 'out_irq-np' as 'newpar' is already holding a ref. The refcount balance between 'ipar' and 'newpar' is maintained in the skiplevel: goto label. This patch also removes the usage of the device_node variable 'old' which seems useless after the latest changes. Signed-off-by: Cédric Le Goater c...@fr.ibm.com --- This patch was tested on powerpc, pseries and powernv. This is a new area for me so I might have missed a path. Please take a look. We could now introduce an helper routine to look for #address-cells in of_irq_parse_raw(). This can be the subject of another patch. Thanks, C. drivers/of/irq.c |5 + 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/of/irq.c b/drivers/of/irq.c index 786b0b47fae4..27212402c532 100644 --- a/drivers/of/irq.c +++ b/drivers/of/irq.c @@ -165,7 +165,6 @@ int of_irq_parse_raw(const __be32 *addr, struct of_phandle_args *out_irq) if (of_get_property(ipar, interrupt-controller, NULL) != NULL) { pr_debug( - got it !\n); - of_node_put(old); return 0; } @@ -250,8 +249,7 @@ int of_irq_parse_raw(const __be32 *addr, struct of_phandle_args *out_irq) * Successfully parsed an interrrupt-map translation; copy new * interrupt specifier into the out_irq structure */ - of_node_put(out_irq-np); - out_irq-np = of_node_get(newpar); + out_irq-np = newpar; match_array = imap - newaddrsize - newintsize; for (i = 0; i newintsize; i++) @@ -268,7 +266,6 @@ int of_irq_parse_raw(const __be32 *addr, struct of_phandle_args *out_irq) } fail: of_node_put(ipar); - of_node_put(out_irq-np); of_node_put(newpar); return -EINVAL; -- 1.7.10.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 4/5] powerpc/fsl-booke: Add initial T208x QDS board support
On Wed, 2013-12-11 at 19:19 +0800, Shengzhou Liu wrote: + boardctrl: board-control@3,0 { + #address-cells = 1; + #size-cells = 1; + compatible = fsl,fpga-qixis; + reg = 3 0 0x300; + ranges = 0 3 0 0x300; + }; Why do you have ranges and #address-cells/#size-cells here? When would there ever be a child node? + }; + + memory { + device_type = memory; + }; + + dcsr: dcsr@f { + ranges = 0x 0xf 0x 0x01072000; + }; + + soc: soc@ffe00 { + ranges = 0x 0xf 0xfe00 0x100; + reg = 0xf 0xfe00 0 0x1000; + spi@11 { + flash@0 { + #address-cells = 1; + #size-cells = 1; + compatible = spansion,s25sl12801; + reg = 0; + spi-max-frequency = 4000; /* input clock */ + partition@u-boot { + label = SPI U-Boot; + reg = 0x 0x0010; + read-only; + }; + partition@kernel { + label = SPI Kernel; + reg = 0x0010 0x0050; + read-only; + }; + partition@dtb { + label = SPI DTB; + reg = 0x0060 0x0010; + read-only; + }; + partition@fs { + label = SPI File System; + reg = 0x0070 0x0090; + }; These are not valid unit addresses -- and the kernel/dtb should not be read-only. Please consider fixing where you copied this from, as well. Do you really want to dedicate a whole mebibyte to the dtb, given that the flash is only 16 MiB total? Actually, let's please just stop putting partitions in the dts. Either use mtdparts on the command line, or have U-Boot fill in the partition info (there is code in U-Boot to do this based on the mtdparts env variable). diff --git a/arch/powerpc/include/asm/mpc85xx.h b/arch/powerpc/include/asm/mpc85xx.h index 736d4ac..3bef74a 100644 --- a/arch/powerpc/include/asm/mpc85xx.h +++ b/arch/powerpc/include/asm/mpc85xx.h @@ -77,6 +77,8 @@ #define SVR_T10200x852100 #define SVR_T10210x852101 #define SVR_T10220x852102 +#define SVR_T20800x853000 +#define SVR_T20810x853100 #define SVR_8610 0x80A000 #define SVR_8641 0x809000 Why is this in the board device tree patch? -Scott ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH] DTS: DMA: Fix DMA3 interrupts
On Tue, 2013-12-17 at 16:48 +0800, Li Yang wrote: On Wed, Dec 11, 2013 at 2:33 AM, Scott Wood scottw...@freescale.com wrote: On Tue, 2013-12-10 at 18:33 +0800, Hongbo Zhang wrote: Scott, This issue is due to the non-continuous MPIC register, I think there is two ways to fix it. The first one is as what we are discussing, in fact the Bman/Qman DT author had introduced this way, and I had to follow it, it is a trick, adding 208 is a bit ugly I think, and even difficult to explain it to customers etc, but this way changes less codes. The second one is editing MPIC related codes without adding 208 to high interrupts. The point of translate interrupt number to MPIC register address is a so called 'isu' mechanism, we can do like the following example codes, then the tricky adding 208 isn't needed any more. Which one do you prefer? In fact I myself prefer the second, if the idea is acceptable, I will send a patch instead of this one. (and also alone with the internal patch decreasing 208 for the Bman/Qman) void __init corenet_ds_pic_init(void) { .. mpic = mpic_alloc(NULL, 0, flags, 0, 512, OpenPIC); BUG_ON(mpic == NULL); // Add this start for (i = 0; i 17; i++) { if (i 11) addr_off = 0x1 + 0x20 * 16 * i; else addr_off = 0x13000 + 0x20 * 16 * (i - 11); /* scape the address not for interrupts */ mpic_assign_isu(mpic, i, mpic-paddr + addr_off); } // Add this end mpic_init(mpic); } NACK We already have a binding that states that the interrupt number is based on the register offset, rather than whatever arbitrary numbers hardware documenters decide to use next week. While I'm not terribly happy with the usability of this, especially now that it's not a simple add 16, redefining the existing binding is not OK (and in any case the code above seems obfuscatory). If we decide to do something other than continue with register offset divided by 32, then we need to define a new interrupt type (similar to current defined types of error interrupt, timer, and IPI) for the new numberspace -- and it should be handled when decoding that type of interrupt specifier, rather than with the isu mechanism. I had to say that the current binding is terribly confusing. I know a lot of people who were confused while looking into the device tree and I had to help them out now and then. I even got confused for some time at the very beginning. :( If we want to keep the current binding, a big warning message is well deserved at the very beginning of the binding document to clarify that the interrupt number used in device tree has nothing to do with the interrupt number mentioned in the silicon reference manuals. I think we should even mention the add 16 magic rule also to make it more intuitive. I suggested some explanatory text in another thread. Adding a new interrupt type for external interrupts is good to make the interrupt number consistent with the Reference Manuals. But doing that requires changing all the device trees that used the previous binding, We may want to change them all for clarity, but there's no compatibility issue. Old device trees must continue to be supported. and we need a mechanism of judging which binding a device tree is complying to. If it uses the new interrupt type number, then it's the new binding. Otherwise, it's the old. The binding would only be added to, not changed. I'm not convinced it's worth adding at this point, though -- let's add some helpful text to the binding and see how much that helps. -Scott ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH] powerpc: Make 64-bit non-VMX __copy_tofrom_user bi-endian
From: Paul E. McKenney paul...@linux.vnet.ibm.com The powerpc 64-bit __copy_tofrom_user() function uses shifts to handle unaligned invocations. However, these shifts were designed for big-endian systems: On little-endian systems, they must shift in the opposite direction. This commit relies on the C preprocessor to insert the correct shifts into the assembly code. [ This is a rare but nasty LE issue. Most of the time we use the POWER7 optimised __copy_tofrom_user_power7 loop, but when it hits an exception we fall back to the base __copy_tofrom_user loop. - Anton ] Signed-off-by: Paul E. McKenney paul...@linux.vnet.ibm.com Signed-off-by: Anton Blanchard an...@samba.org --- diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S index d73a590..596a285 100644 --- a/arch/powerpc/lib/copyuser_64.S +++ b/arch/powerpc/lib/copyuser_64.S @@ -9,6 +9,14 @@ #include asm/processor.h #include asm/ppc_asm.h +#ifdef __BIG_ENDIAN__ +#define sLd sld/* Shift towards low-numbered address. */ +#define sHd srd/* Shift towards high-numbered address. */ +#else +#define sLd srd/* Shift towards low-numbered address. */ +#define sHd sld/* Shift towards high-numbered address. */ +#endif + .align 7 _GLOBAL(__copy_tofrom_user) BEGIN_FTR_SECTION @@ -118,10 +126,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 24:ld r9,0(r4)/* 3+2n loads, 2+2n stores */ 25:ld r0,8(r4) - sld r6,r9,r10 + sLd r6,r9,r10 26:ldu r9,16(r4) - srd r7,r0,r11 - sld r8,r0,r10 + sHd r7,r0,r11 + sLd r8,r0,r10 or r7,r7,r6 blt cr6,79f 27:ld r0,8(r4) @@ -129,35 +137,35 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) 28:ld r0,0(r4)/* 4+2n loads, 3+2n stores */ 29:ldu r9,8(r4) - sld r8,r0,r10 + sLd r8,r0,r10 addir3,r3,-8 blt cr6,5f 30:ld r0,8(r4) - srd r12,r9,r11 - sld r6,r9,r10 + sHd r12,r9,r11 + sLd r6,r9,r10 31:ldu r9,16(r4) or r12,r8,r12 - srd r7,r0,r11 - sld r8,r0,r10 + sHd r7,r0,r11 + sLd r8,r0,r10 addir3,r3,16 beq cr6,78f 1: or r7,r7,r6 32:ld r0,8(r4) 76:std r12,8(r3) -2: srd r12,r9,r11 - sld r6,r9,r10 +2: sHd r12,r9,r11 + sLd r6,r9,r10 33:ldu r9,16(r4) or r12,r8,r12 77:stdur7,16(r3) - srd r7,r0,r11 - sld r8,r0,r10 + sHd r7,r0,r11 + sLd r8,r0,r10 bdnz1b 78:std r12,8(r3) or r7,r7,r6 79:std r7,16(r3) -5: srd r12,r9,r11 +5: sHd r12,r9,r11 or r12,r8,r12 80:std r12,24(r3) bne 6f @@ -165,23 +173,38 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) blr 6: cmpwi cr1,r5,8 addir3,r3,32 - sld r9,r9,r10 + sLd r9,r9,r10 ble cr1,7f 34:ld r0,8(r4) - srd r7,r0,r11 + sHd r7,r0,r11 or r9,r7,r9 7: bf cr7*4+1,1f +#ifdef __BIG_ENDIAN__ rotldi r9,r9,32 +#endif 94:stw r9,0(r3) +#ifdef __LITTLE_ENDIAN__ + rotrdi r9,r9,32 +#endif addir3,r3,4 1: bf cr7*4+2,2f +#ifdef __BIG_ENDIAN__ rotldi r9,r9,16 +#endif 95:sth r9,0(r3) +#ifdef __LITTLE_ENDIAN__ + rotrdi r9,r9,16 +#endif addir3,r3,2 2: bf cr7*4+3,3f +#ifdef __BIG_ENDIAN__ rotldi r9,r9,8 +#endif 96:stb r9,0(r3) +#ifdef __LITTLE_ENDIAN__ + rotrdi r9,r9,8 +#endif 3: li r3,0 blr ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v2] powerpc 8xx: Loading kernels over 8Mbytes without CONFIG_PIN_TLB
On Tue, 2013-12-17 at 06:54 +0100, leroy christophe wrote: Le 16/12/2013 23:57, Scott Wood a écrit : On Wed, 2013-12-11 at 00:36 +0100, leroy christophe wrote: Le 11/12/2013 00:18, Scott Wood a écrit : There wasn't previously an ifdef specifically around the setting of SPRN_MD_CTR. That's new. There was an ifdef around the entire block, which has gone away because you are now trying to map more than 8M regardless of CONFIG_PIN_TLB, but that has nothing to do with whether there should be an ifdef around SPRN_MD_CTR. Euh, ok, but then we have to fix it in the whole function, not only in this block. Do you think it is worth doing it ? Fix what in the whole function? I was asking what harm there would be if you just remove all the CONFIG_PIN_TLB ifdefs except around the actual RSV4I setting -- do we really care what value goes in MD_CTR for the non-pinned case, as long as it's different for each entry? MD_CTR is decremented after each entry added. However, the function populates entry 28, then 29, then 30, then 31. At the end MD_CTR has then value 30, ready to overide entry 30 then 29 then 28 then 27 . So I will remove all the CONFIG_PIN_TLB, but I'll also have to fix the value set in MD_CTR to start from 31, won't I ? OK, so the answer is that we rely on autodecrement avoiding entries over 28 in the CONFIG_PIN_TLB case. Leave the ifdefs, then. Do you have any comment/recommendation on my tentative v3 patch where I have tried to implement based on the use of r7 as you recommended ? I haven't reviewed it yet. -Scott ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH V4 09/10] power8, perf: Change BHRB branch filter configuration
On Fri, 2013-12-13 at 13:50 +0530, Anshuman Khandual wrote: On 12/09/2013 11:51 AM, Michael Ellerman wrote: As I said in my comments on version 3 which you ignored: I think it would be clearer if we actually checked for the possibilities we allow and let everything else fall through, eg: Â Â Â Â Â Â Â Â /* Ignore user/kernel/hv bits */ Â Â Â Â Â Â Â Â branch_sample_type = ~PERF_SAMPLE_BRANCH_PLM_ALL; Â Â Â Â Â Â Â Â if (branch_sample_type == PERF_SAMPLE_BRANCH_ANY) Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â return 0; Â Â Â Â Â Â Â Â if (branch_sample_type == PERF_SAMPLE_BRANCH_ANY_CALL) Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â return POWER8_MMCRA_IFM1; Â Â Â Â Â Â Â Â Â if (branch_sample_type == PERF_SAMPLE_BRANCH_COND) Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â return POWER8_MMCRA_IFM3; Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â return -1; Hey Michael, This patch only adds support for the PERF_SAMPLE_BRANCH_COND filter, if the over all code flow does not clearly suggest that all combinations of any of these HW filters are invalid, then we can go with one more patch to clean that up before or after this patch but not here in this patch. Finally the code section here will look something like this. Does it sound good ? Better, but not quite. static u64 power8_bhrb_filter_map(u64 branch_sample_type) { u64 pmu_bhrb_filter = 0; /* BHRB and regular PMU events share the same privilege state * filter configuration. BHRB is always recorded along with a * regular PMU event. As the privilege state filter is handled * in the basic PMC configuration of the accompanying regular * PMU event, we ignore any separate BHRB specific request. */ /* Ignore user, kernel, hv bits */ branch_sample_type = ~PERF_SAMPLE_BRANCH_PLM_ALL; if (branch_sample_type == PERF_SAMPLE_BRANCH_ANY) return pmu_bhrb_filter; return 0; if (branch_sample_type == PERF_SAMPLE_BRANCH_ANY_CALL) { pmu_bhrb_filter |= POWER8_MMCRA_IFM1; return pmu_bhrb_filter; return POWER8_MMCRA_IFM1; } if (branch_sample_type == PERF_SAMPLE_BRANCH_COND) { pmu_bhrb_filter |= POWER8_MMCRA_IFM3; return pmu_bhrb_filter; return POWER8_MMCRA_IFM3; } /* Every thing else is unsupported */ return -1; } cheers ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH] powerpc: Fix bad stack check in exception entry
On Mon, 2013-12-16 at 15:12 +1100, Michael Neuling wrote: In EXCEPTION_PROLOG_COMMON() we check to see if the stack pointer (r1) is valid when coming from the kernel. If it's not valid, we die but with a nice oops message. Currently we allocate a stack frame (subtract INT_FRAME_SIZE) before we check to see if the stack pointer is negative. Unfortunately, this won't detect a bad stack where r1 is less than INT_FRAME_SIZE. The key detail being that we *expect* a negative value, because kernel addresses (0xc000) are negative. cheers ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 1/2] power7, perf: Make some new raw event codes available in sysfs
On Fri, 2013-12-13 at 10:00 +0530, Anshuman Khandual wrote: On 12/13/2013 08:20 AM, Michael Ellerman wrote: On Wed, 2013-10-16 at 11:22 +0530, Anshuman Khandual wrote: This patch adds some more raw event codes into the existing list of event codes present in power7-events-list.h file. This tries to complete the list of events supported in Power7 and matches the raw event list with libpfm4 library. It's a bit annoying, but you also need to update the ABI document: What is annoying ? you need to be specific. It's annoying that we have to update the ABI document. Documentation/ABI/testing/sysfs-bus-event_source-devices-events The events listed under the following heading are events required to do CPI analysis. No they are not, it doesn't say that anywhere in the file. It happens that the events in there *now* are the CPI events, but that's because they are the only ones that have been added. Description:POWER-systems specific performance monitoring event /sys/devices/cpu/events/PM_1PLUS_PPC_CMPL /sys/devices/cpu/events/PM_BRU_FIN /sys/devices/cpu/events/PM_BR_MPRED /sys/devices/cpu/events/PM_CMPLU_STALL All events that appear in /sys/devices/cpu/events on powerpc should be listed in the file. cheers ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH] powerpc, perf: Configure BHRB filter before enabling PMU interrupts
From: Anshuman Khandual khand...@linux.vnet.ibm.com Right now the config_bhrb() PMU specific call happens after write_mmcr0(), which actually enables the PMU for event counting and interrupts. So there is a small window of time where the PMU and BHRB runs without the required HW branch filter (if any) enabled in BHRB. This can cause some of the branch samples to be collected through BHRB without any filter applied and hence affects the correctness of the results. This patch moves the BHRB config function call before enabling interrupts. Here are some data points captured via trace prints which depicts how we could get PMU interrupts with BHRB filter NOT enabled with a standard perf record command line (asking for branch record information as well). $ perf record -j any_call ls Before the patch:- ls-1962 [003] d... 2065.299590: .perf_event_interrupt: MMCRA: 400 ls-1962 [003] d... 2065.299603: .perf_event_interrupt: MMCRA: 400 ... All the PMU interrupts before this point did not have the requested HW branch filter enabled in the MMCRA. ls-1962 [003] d... 2065.299647: .perf_event_interrupt: MMCRA: 4004000 ls-1962 [003] d... 2065.299662: .perf_event_interrupt: MMCRA: 4004000 After the patch:- ls-1850 [008] d... 190.311828: .perf_event_interrupt: MMCRA: 4004000 ls-1850 [008] d... 190.311848: .perf_event_interrupt: MMCRA: 4004000 All the PMU interrupts have the requested HW BHRB branch filter enabled in MMCRA. Signed-off-by: Anshuman Khandual khand...@linux.vnet.ibm.com [mpe: Fixed up whitespace and cleaned up changelog] Signed-off-by: Michael Ellerman m...@ellerman.id.au --- arch/powerpc/perf/core-book3s.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) Ben this is a bug fix so would be nice for 3.13. diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 29b89e8..67cf220 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -1147,6 +1147,9 @@ static void power_pmu_enable(struct pmu *pmu) mmcr0 = ebb_switch_in(ebb, cpuhw-mmcr[0]); mb(); + if (cpuhw-bhrb_users) + ppmu-config_bhrb(cpuhw-bhrb_filter); + write_mmcr0(cpuhw, mmcr0); /* @@ -1158,8 +1161,6 @@ static void power_pmu_enable(struct pmu *pmu) } out: - if (cpuhw-bhrb_users) - ppmu-config_bhrb(cpuhw-bhrb_filter); local_irq_restore(flags); } -- 1.8.3.2 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v2] powerpc/powernv: Framework to log critical errors on powernv.
On Mon, 2013-12-16 at 18:00 +0530, Deepthi Dharwar wrote: This patch provides error logging interfaces to report critical powernv error to FSP. All the required information to dump the error is collected at POWERNV level through error log interfaces and then pushed on to FSP. This also supports synchronous error logging in cases of PANIC, by passing OPAL_ERROR_PANIC as severity in elog_create() call. Please make note of the fact that none of this code is currently used but will be in a subsequent patch. When can we expect those patches? diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 0f01308..1c5440a 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -168,6 +168,7 @@ extern int opal_enter_rtas(struct rtas_args *args, #define OPAL_GET_MSG 85 #define OPAL_CHECK_ASYNC_COMPLETION 86 #define OPAL_SYNC_HOST_REBOOT87 +#define OPAL_ELOG_SEND 88 #ifndef __ASSEMBLY__ @@ -260,6 +261,122 @@ enum OpalMessageType { OPAL_MSG_TYPE_MAX, }; +/* Classification of Error/Events reporting type classification Standard comment style for block comments is: /* * Classification ... */ That applies to almost all of your comments in here. + * Platform Events/Errors: Report Machine Check Interrupt I think these comments would be better inline with the values, eg: /* Report Machine Check Interrupt */ OPAL_PLATFORM, /* Report all I/O related events/errors */ OPAL_INPUT_OUTPUT, etc. Again that applies to most of your comments. + * INPUT_OUTPUT: Report all I/O related events/errors + * RESOURCE_DEALLOC: Hotplug events and errors + * MISC: Miscellanous error + * Field: error_events_type What is this Field: thing about? + */ +enum error_events { If you're going to define an enum you should actually use it in the API, I can't see anywhere you do? If you do want to use an enum it should be opal_error_events. + OPAL_PLATFORM, + OPAL_INPUT_OUTPUT, + OPAL_RESOURCE_DEALLOC, + OPAL_MISC, +}; + +/* OPAL Subsystem IDs listed for reporting events/errors + * Field: subsystem_id + */ + +#define OPAL_PROCESSOR_SUBSYSTEM0x10 +#define OPAL_MEMORY_SUBSYSTEM 0x20 +#define OPAL_IO_SUBSYSTEM 0x30 +#define OPAL_IO_DEVICES 0x40 +#define OPAL_CEC_HARDWARE 0x50 +#define OPAL_POWER_COOLING 0x60 +#define OPAL_MISC_SUBSYSTEM 0x70 +#define OPAL_SURVEILLANCE_ERR 0x7A +#define OPAL_PLATFORM_FIRMWARE 0x80 +#define OPAL_SOFTWARE 0x90 +#define OPAL_EXTERNAL_ENV 0xA0 + +/* During reporting an event/error the following represents + * how serious the logged event/error is. (Severity) + * Field: event_sev + */ +#define OPAL_INFO 0x00 +#define OPAL_RECOVERED_ERR_GENERAL 0x10 + +/* 0x2X series is to denote set of Predictive Error + * 0x20 Generic predictive error + * 0x21 Predictive error, degraded performance + * 0x22 Predictive error, fault may be corrected after reboot + * 0x23 Predictive error, fault may be corrected after reboot, + * degraded performance + * 0x24 Predictive error, loss of redundancy + */ +#define OPAL_PREDICTIVE_ERR_GENERAL 0x20 +#define OPAL_PREDICTIVE_ERR_DEGRADED_PERF 0x21 +#define OPAL_PREDICTIVE_ERR_FAULT_RECTIFY_REBOOT0x22 +#define OPAL_PREDICTIVE_ERR_FAULT_RECTIFY_BOOT_DEGRADE_PERF 0x23 +#define OPAL_PREDICTIVE_ERR_LOSS_OF_REDUNDANCY 0x24 + +/* 0x4X series for Unrecoverable Error + * 0x40 Generic Unrecoverable error + * 0x41 Unrecoverable error bypassed with degraded performance + * 0x44 Unrecoverable error bypassed with loss of redundancy + * 0x45 Unrecoverable error bypassed with loss of redundancy and performance + * 0x48 Unrecoverable error bypassed with loss of function + */ +#define OPAL_UNRECOVERABLE_ERR_GENERAL 0x40 +#define OPAL_UNRECOVERABLE_ERR_DEGRADE_PERF 0x41 +#define OPAL_UNRECOVERABLE_ERR_LOSS_REDUNDANCY 0x44 +#define OPAL_UNRECOVERABLE_ERR_LOSS_REDUNDANCY_PERF 0x45 +#define OPAL_UNRECOVERABLE_ERR_LOSS_OF_FUNCTION 0x48 +#define OPAL_ERROR_PANIC0x50 + +/* Event Sub-type + * This field provides additional information on the non-error + * event type + * Field: event_subtype + */ +#define OPAL_NA 0x00 +#define OPAL_MISCELLANEOUS_INFO_ONLY0x01 +#define OPAL_PREV_REPORTED_ERR_RECTIFIED0x10 +#define OPAL_SYS_RESOURCES_DECONFIG_BY_USER 0x20 +#define OPAL_SYS_RESOURCE_DECONFIG_PRIOR_ERR0x21 +#define OPAL_RESOURCE_DEALLOC_EVENT_NOTIFY
Re: [v6][PATCH 2/5] powerpc/book3e: store crit/mc/dbg exception thread info
On Wed, 2013-10-23 at 17:31 +0800, Tiejun Chen wrote: We need to store thread info to these exception thread info like something we already did for PPC32. Signed-off-by: Tiejun Chen tiejun.c...@windriver.com --- arch/powerpc/kernel/exceptions-64e.S | 22 +++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 68d74b4..a55cf62 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -36,6 +36,19 @@ */ #define SPECIAL_EXC_FRAME_SIZE INT_FRAME_SIZE +/* Now we only store something to exception thread info */ +#define EXC_LEVEL_EXCEPTION_PROLOG(type) \ + ld r14,PACAKSAVE(r13); \ + CURRENT_THREAD_INFO(r14, r14); \ + CURRENT_THREAD_INFO(r15, r1); \ + ld r10,TI_FLAGS(r14); \ + std r10,TI_FLAGS(r15); \ + ld r10,TI_PREEMPT(r14);\ + std r10,TI_PREEMPT(r15);\ + ld r10,TI_TASK(r14); \ + std r10,TI_TASK(r15); + + /* Exception prolog code for all exceptions */ #define EXCEPTION_PROLOG(n, intnum, type, addition) \ mtspr SPRN_SPRG_##type##_SCRATCH,r13; /* get spare registers */ \ @@ -69,19 +82,22 @@ #define CRIT_SET_KSTACK \ ld r1,PACA_CRIT_STACK(r13);\ - subir1,r1,SPECIAL_EXC_FRAME_SIZE; + subir1,r1,SPECIAL_EXC_FRAME_SIZE; \ + EXC_LEVEL_EXCEPTION_PROLOG(CRIT); #define SPRN_CRIT_SRR0 SPRN_CSRR0 #define SPRN_CRIT_SRR1 SPRN_CSRR1 #define DBG_SET_KSTACK \ ld r1,PACA_DBG_STACK(r13); \ - subir1,r1,SPECIAL_EXC_FRAME_SIZE; + subir1,r1,SPECIAL_EXC_FRAME_SIZE; \ + EXC_LEVEL_EXCEPTION_PROLOG(DBG); #define SPRN_DBG_SRR0SPRN_DSRR0 #define SPRN_DBG_SRR1SPRN_DSRR1 #define MC_SET_KSTACK \ ld r1,PACA_MC_STACK(r13); \ - subir1,r1,SPECIAL_EXC_FRAME_SIZE; + subir1,r1,SPECIAL_EXC_FRAME_SIZE; \ + EXC_LEVEL_EXCEPTION_PROLOG(MC); #define SPRN_MC_SRR0 SPRN_MCSRR0 #define SPRN_MC_SRR1 SPRN_MCSRR1 We should rename these if they're going to do more than set up a stack. -Scott ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [v6][PATCH 5/5] powerpc/book3e/kgdb: Fix a single stgep case of lazy IRQ
On Wed, 2013-10-23 at 17:31 +0800, Tiejun Chen wrote: In lazy EE magic, we may have a lazy interrupt occured while entering kgdb, but we really don't want to replay that interrupt for kgdb, so we have to clear the PACA_IRQ_HARD_DIS force to make sure we can exit directly from this debug exception. Signed-off-by: Tiejun Chen tiejun.c...@windriver.com s/stgep/step/ in subject --- arch/powerpc/kernel/kgdb.c |8 1 file changed, 8 insertions(+) diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 447c14b..9872f58 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -185,6 +185,14 @@ static int kgdb_singlestep(struct pt_regs *regs) /* Restore current_thread_info lastly. */ memcpy(exception_thread_info, backup_current_thread_info, sizeof *thread_info); +#ifdef CONFIG_PPC64 + /* + * Clear the PACA_IRQ_HARD_DIS from the pending mask + * since we are about to exit this directly from debug + * exception without any replay interrupt in lazy EE case. + */ + local_paca-irq_happened = ~PACA_IRQ_HARD_DIS; +#endif return 1; } What happens to those interrupts you discarded once we get back to a state when they can be safely replayed? I don't think just dropping them is the answer. I'm not sure what the actual problem is. I can understand not wanting kgdb to cause interrupts to appear to run when the interrupted context has external interrupts disabled, but the replay code in entry_64.S doesn't run if interrupts are soft-disabled in the context to be returned to. What harm does it cause to run the interrupts if we're returning to an EE=1 context? Does KGDB enable interrupts in its handler? If not, how do we even get into the situation where there are interrupts pending when the interrupted context has EE soft-enabled (i.e. we went directly from a context where the interrupt handler should have run, to a hard-disabled context)? -Scott ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [v3][PATCH 1/8] powerpc/book3e: rename interrupt_end_book3e with __end_interrupts
On Tue, 2013-07-09 at 16:03 +0800, Tiejun Chen wrote: We can rename 'interrupt_end_book3e' with '__end_interrupts' then book3s/book3e can share this unique label to make sure we can use this conveniently. Signed-off-by: Tiejun Chen tiejun.c...@windriver.com What users of this do you plan to share between book3s and book3e? I'm not seeing any existing book3s users that are obviously applicable to book3e -- they mainly involve copying exception vectors which we shouldn't need to do. -Scott ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [v3][PATCH 2/8] powerpc/book3e: support CONFIG_RELOCATABLE
On Tue, 2013-07-09 at 16:03 +0800, Tiejun Chen wrote: book3e is different with book3s since 3s includes the exception vectors code in head_64.S as it relies on absolute addressing which is only possible within this compilation unit. So we have to get that label address with got. And when boot a relocated kernel, we should reset ipvr properly again after .relocate. ivpr? Signed-off-by: Tiejun Chen tiejun.c...@windriver.com --- arch/powerpc/include/asm/exception-64e.h | 11 +++ arch/powerpc/kernel/exceptions-64e.S | 18 +- arch/powerpc/kernel/head_64.S| 25 + 3 files changed, 53 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/exception-64e.h b/arch/powerpc/include/asm/exception-64e.h index 51fa43e..371a77f 100644 --- a/arch/powerpc/include/asm/exception-64e.h +++ b/arch/powerpc/include/asm/exception-64e.h @@ -214,10 +214,21 @@ exc_##label##_book3e: #define TLB_MISS_STATS_SAVE_INFO_BOLTED #endif +#ifndef CONFIG_RELOCATABLE Please use positive logic (ifdef/else, rather than ifndef/else). #define SET_IVOR(vector_number, vector_offset) \ li r3,vector_offset@l; \ ori r3,r3,interrupt_base_book3e@l; \ mtspr SPRN_IVOR##vector_number,r3; +#else /* !CONFIG_RELOCATABLE */ +/* In relocatable case the value of the constant expression 'expr' is only + * offset. So instead, we should loads the address of label 'name'. + */ +#define SET_IVOR(vector_number, vector_offset) \ + LOAD_REG_ADDR(r3,interrupt_base_book3e);\ + rlwinm r3,r3,0,15,0; \ + ori r3,r3,vector_offset@l; \ + mtspr SPRN_IVOR##vector_number,r3; +#endif /* CONFIG_RELOCATABLE */ Please use the more readable 4-operand version of rlwinm. Is there a reason why this new code is only used with CONFIG_RELOCATABLE? If @got doesn't work without CONFIG_RELOCATABLE, then the ifdef should be pushed into LOAD_REG_ADDR. Likewise with other ifdefs on CONFIG_RELOCATABLE. -_STATIC(init_core_book3e) +_GLOBAL(init_core_book3e) /* Establish the interrupt vector base */ +#ifdef CONFIG_RELOCATABLE +/* In relocatable case the value of the constant expression 'expr' is only + * offset. So instead, we should loads the address of label 'name'. + */ + tovirt(r2,r2) + LOAD_REG_ADDR(r3, interrupt_base_book3e) +#else LOAD_REG_IMMEDIATE(r3, interrupt_base_book3e) +#endif I'm having a hard time parsing the comment. Plus, it feels wrong to decouple tovirt(r2,r2) from the call to relative_toc. mtspr SPRN_IVPR,r3 sync blr diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index b61363d..550f8fb 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -414,12 +414,25 @@ _STATIC(__after_prom_start) /* process relocations for the final address of the kernel */ lis r25,PAGE_OFFSET@highest /* compute virtual base of kernel */ sldir25,r25,32 +#if defined(CONFIG_PPC_BOOK3E) + tovirt(r26,r26) /* on booke, we already run at PAGE_OFFSET */ +#endif lwz r7,__run_at_load-_stext(r26) +#if defined(CONFIG_PPC_BOOK3E) + tophys(r26,r26) /* Restore for the remains. */ +#endif cmplwi cr0,r7,1/* flagged to stay where we are ? */ bne 1f add r25,r25,r26 1: mr r3,r25 bl .relocate +#if defined(CONFIG_PPC_BOOK3E) + /* In relocatable case we always have to load the address of label 'name' + * to set IVPR. So after .relocate we have to update IVPR with current + * address of label. + */ + bl .init_core_book3e +#endif Maybe this function should be renamed to something ivpr-specific, so nothing else gets added there. #endif /* @@ -447,12 +460,24 @@ _STATIC(__after_prom_start) * variable __run_at_load, if it is set the kernel is treated as relocatable * kernel, otherwise it will be moved to PHYSICAL_START */ +#if defined(CONFIG_PPC_BOOK3E) + tovirt(r26,r26) /* on booke, we already run at PAGE_OFFSET */ +#endif lwz r7,__run_at_load-_stext(r26) +#if defined(CONFIG_PPC_BOOK3E) + tophys(r26,r26) /* Restore for the remains. */ +#endif cmplwi cr0,r7,1 bne 3f +#ifdef CONFIG_PPC_BOOK3E + LOAD_REG_ADDR(r5, __end_interrupts) + LOAD_REG_ADDR(r11, _stext) + sub r5,r5,r11 +#else /* just copy interrupts */ LOAD_REG_IMMEDIATE(r5, __end_interrupts - _stext) +#endif Can't we skip the interrupt copying on book3e? And if not for some reason, why start at _stext? -Scott ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [v3][PATCH 3/8] book3e/kexec/kdump: enable kexec for kernel
On Tue, 2013-07-09 at 16:03 +0800, Tiejun Chen wrote: We need to active KEXEC for book3e and bypass or convert non-book3e stuff in kexec coverage. Signed-off-by: Tiejun Chen tiejun.c...@windriver.com --- arch/powerpc/Kconfig |2 +- arch/powerpc/kernel/machine_kexec_64.c | 148 ++-- arch/powerpc/kernel/misc_64.S |6 ++ 3 files changed, 89 insertions(+), 67 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 5374776..d945435 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -357,7 +357,7 @@ config ARCH_ENABLE_MEMORY_HOTREMOVE config KEXEC bool kexec system call - depends on (PPC_BOOK3S || FSL_BOOKE || (44x !SMP)) + depends on (PPC_BOOK3S || FSL_BOOKE || (44x !SMP)) || PPC_BOOK3E Please remove the outher parentheses, and especially don't put PPC_BOOK3E on the outside of them when there's no reason to group the other items together. @@ -367,6 +301,87 @@ void default_machine_kexec(struct kimage *image) /* NOTREACHED */ } +#ifdef CONFIG_PPC_BOOK3E +int default_machine_kexec_prepare(struct kimage *image) +{ + int i; + /* + * Since we use the kernel fault handlers and paging code to + * handle the virtual mode, we must make sure no destination + * overlaps kernel static data or bss. + */ + for (i = 0; i image-nr_segments; i++) + if (image-segment[i].mem __pa(_end)) + return -ETXTBSY; + return 0; Factor out this common code rather than duplicate it. -Scott ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [v3][PATCH 4/8] book3e/kexec/kdump: create a 1:1 TLB mapping
On Tue, 2013-07-09 at 16:03 +0800, Tiejun Chen wrote: book3e have no real MMU mode so we have to create a 1:1 TLB mapping to make sure we can access the real physical address. And correct something to support this pseudo real mode on book3e. Signed-off-by: Tiejun Chen tiejun.c...@windriver.com Why do we need to be able to directly access physical addresses? diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index f1a7ce7..20cbb98 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -460,6 +460,49 @@ kexec_flag: #ifdef CONFIG_KEXEC +#ifdef CONFIG_PPC_BOOK3E +/* BOOK3E have no a real MMU mode so we have to setup the initial TLB + * for a core to map v:0 to p:0 as 1:1. This current implementation + * assume that 1G is enough for kexec. + */ +#include asm/mmu.h #includes go at the top of the file. +kexec_create_tlb: + /* Invalidate all TLBs to avoid any TLB conflict. */ + PPC_TLBILX_ALL(0,R0) + sync + isync + + mfspr r10,SPRN_TLB1CFG + andi. r10,r10,TLBnCFG_N_ENTRY /* Extract # entries */ + subir10,r10,1 /* Often its always safe to use last */ + lis r9,MAS0_TLBSEL(1)@h + rlwimi r9,r10,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r9) */ Hardcoding TLB1 makes this FSL-specific code, but you've put it in a non-FSL-specific place. +/* Setup a temp mapping v:0 to p:0 as 1:1 and return to it. + */ +#ifdef CONFIG_SMP +#define M_IF_SMP MAS2_M +#else +#define M_IF_SMP 0 +#endif + mtspr SPRN_MAS0,r9 + + lis r9,(MAS1_VALID|MAS1_IPROT)@h + ori r9,r9,(MAS1_TSIZE(BOOK3E_PAGESZ_1GB))@l + mtspr SPRN_MAS1,r9 What if the machine has less than 1 GiB of RAM? We could get speculative accesses to non-present addresses. Though it looks like the normal 64-bit init sequence has the same problem... -Scott ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH] powerpc, perf: Configure BHRB filter before enabling PMU interrupts
On 12/18/2013 07:44 AM, Michael Ellerman wrote: From: Anshuman Khandual khand...@linux.vnet.ibm.com Right now the config_bhrb() PMU specific call happens after write_mmcr0(), which actually enables the PMU for event counting and interrupts. So there is a small window of time where the PMU and BHRB runs without the required HW branch filter (if any) enabled in BHRB. This can cause some of the branch samples to be collected through BHRB without any filter applied and hence affects the correctness of the results. This patch moves the BHRB config function call before enabling interrupts. Here are some data points captured via trace prints which depicts how we could get PMU interrupts with BHRB filter NOT enabled with a standard perf record command line (asking for branch record information as well). $ perf record -j any_call ls Before the patch:- ls-1962 [003] d... 2065.299590: .perf_event_interrupt: MMCRA: 400 ls-1962 [003] d... 2065.299603: .perf_event_interrupt: MMCRA: 400 ... All the PMU interrupts before this point did not have the requested HW branch filter enabled in the MMCRA. ls-1962 [003] d... 2065.299647: .perf_event_interrupt: MMCRA: 4004000 ls-1962 [003] d... 2065.299662: .perf_event_interrupt: MMCRA: 4004000 After the patch:- ls-1850 [008] d... 190.311828: .perf_event_interrupt: MMCRA: 4004000 ls-1850 [008] d... 190.311848: .perf_event_interrupt: MMCRA: 4004000 All the PMU interrupts have the requested HW BHRB branch filter enabled in MMCRA. Signed-off-by: Anshuman Khandual khand...@linux.vnet.ibm.com [mpe: Fixed up whitespace and cleaned up changelog] Thanks Michael for cleaning and resending this out. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [v3][PATCH 5/8] book3e/kexec/kdump: introduce a kexec kernel flag
On Tue, 2013-07-09 at 16:03 +0800, Tiejun Chen wrote: We need to introduce a flag to indicate we're already running a kexec kernel then we can go proper path. For example, We shouldn't access spin_table from the bootloader to up any secondary cpu for kexec kernel, and kexec kernel already know how to jump to generic_secondary_smp_init. Signed-off-by: Tiejun Chen tiejun.c...@windriver.com --- arch/powerpc/include/asm/smp.h|1 + arch/powerpc/kernel/head_64.S | 10 ++ arch/powerpc/kernel/misc_64.S |6 ++ arch/powerpc/platforms/85xx/smp.c | 20 +++- 4 files changed, 32 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index ffbaabe..59165a3 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -200,6 +200,7 @@ extern void generic_secondary_thread_init(void); extern unsigned long __secondary_hold_spinloop; extern unsigned long __secondary_hold_acknowledge; extern char __secondary_hold; +extern unsigned long __run_at_kexec; extern void __early_start(void); #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 7dc56be..0b46c9d 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -89,6 +89,10 @@ __secondary_hold_spinloop: __secondary_hold_acknowledge: .llong 0x0 + .globl __run_at_kexec +__run_at_kexec: + .llong 0x0 /* Flag for the secondary kernel from kexec. */ + No leading underscores please -- and why does this need to be 64-bit? #ifdef CONFIG_RELOCATABLE /* This flag is set to 1 by a loader if the kernel should run * at the loaded address instead of the linked address. This @@ -417,6 +421,12 @@ _STATIC(__after_prom_start) #if defined(CONFIG_PPC_BOOK3E) tovirt(r26,r26) /* on booke, we already run at PAGE_OFFSET */ #endif +#if defined(CONFIG_KEXEC) || defined(CONFIG_CRASH_DUMP) + /* If relocated we need to restore this flag on that relocated address. */ + ld r7,__run_at_kexec-_stext(r26) + std r7,__run_at_kexec-_stext(r26) +#endif + lwz r7,__run_at_load-_stext(r26) #if defined(CONFIG_PPC_BOOK3E) tophys(r26,r26) /* Restore for the remains. */ diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 20cbb98..c89aead 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -619,6 +619,12 @@ _GLOBAL(kexec_sequence) bl .copy_and_flush /* (dest, src, copy limit, start offset) */ 1: /* assume normal blr return */ + /* notify we're going into kexec kernel for SMP. */ + LOAD_REG_ADDR(r3,__run_at_kexec) + li r4,1 + std r4,0(r3) + sync + /* release other cpus to the new kernel secondary start at 0x60 */ mflrr5 li r6,1 diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c index 5ced4f5..14d461b 100644 --- a/arch/powerpc/platforms/85xx/smp.c +++ b/arch/powerpc/platforms/85xx/smp.c @@ -150,6 +150,9 @@ static int smp_85xx_kick_cpu(int nr) int hw_cpu = get_hard_smp_processor_id(nr); int ioremappable; int ret = 0; +#ifdef CONFIG_PPC64 + unsigned long *ptr = NULL; +#endif Looks like an unnecessary initialization. WARN_ON(nr 0 || nr = NR_CPUS); WARN_ON(hw_cpu 0 || hw_cpu = NR_CPUS); @@ -238,11 +241,18 @@ out: #else smp_generic_kick_cpu(nr); - flush_spin_table(spin_table); - out_be32(spin_table-pir, hw_cpu); - out_be64((u64 *)(spin_table-addr_h), - __pa((u64)*((unsigned long long *)generic_secondary_smp_init))); - flush_spin_table(spin_table); + ptr = (unsigned long *)((unsigned long)__run_at_kexec); + /* We shouldn't access spin_table from the bootloader to up any + * secondary cpu for kexec kernel, and kexec kernel already + * know how to jump to generic_secondary_smp_init. + */ + if (!*ptr) { + flush_spin_table(spin_table); + out_be32(spin_table-pir, hw_cpu); + out_be64((u64 *)(spin_table-addr_h), + __pa((u64)*((unsigned long long *)generic_secondary_smp_init))); + flush_spin_table(spin_table); + } #endif Please use a more descriptive name than ptr. How is all that different than just: if (!__run_at_kexec) { ... } -Scott ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [v3][PATCH 6/8] book3e/kexec/kdump: implement ppc64 kexec specfic
On Tue, 2013-07-09 at 16:03 +0800, Tiejun Chen wrote: ppc64 kexec mechanism has a different implementation with ppc32. Signed-off-by: Tiejun Chen tiejun.c...@windriver.com Could you describe the relevant differences? -Scott ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [v3][PATCH 7/8] book3e/kexec/kdump: redefine VIRT_PHYS_OFFSET
On Tue, 2013-07-09 at 16:03 +0800, Tiejun Chen wrote: Book3e is always aligned 1GB to create TLB so we should use (KERNELBASE - MEMORY_START) as VIRT_PHYS_OFFSET to get __pa/__va properly while boot kdump. What if MEMORY_START - PHYSICAL_START = 1 GiB? What about the comment that says we can't use MEMORY_START before parsing the device tree? -Scott ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [v3][PATCH 8/8] book3e/kexec/kdump: recover r4 = 0 to create the initial TLB
On Tue, 2013-07-09 at 16:03 +0800, Tiejun Chen wrote: In commit 96f013f, powerpc/kexec: Add kexec hold support for Book3e processors, requires that GPR4 survive the hold process, for IBM Blue Gene/Q with with some very strange firmware. But for FSL Book3E, r4 = 1 to indicate that the initial TLB entry for this core already exists so we still should set r4 with 0 to create that initial TLB. Signed-off-by: Tiejun Chen tiejun.c...@windriver.com --- arch/powerpc/kernel/head_64.S |4 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 0b46c9d..d546c5e 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -127,6 +127,10 @@ __secondary_hold: /* Grab our physical cpu number */ mr r24,r3 /* stash r4 for book3e */ +#ifdef CONFIG_PPC_FSL_BOOK3E + /* we need to setup initial TLB entry. */ + li r4,0 +#endif mr r25,r4 This breaks being able to build one kernel that supports both FSL book3e and IBM book3e. -Scott ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH V4 09/10] power8, perf: Change BHRB branch filter configuration
On 12/18/2013 05:38 AM, Michael Ellerman wrote: On Fri, 2013-12-13 at 13:50 +0530, Anshuman Khandual wrote: On 12/09/2013 11:51 AM, Michael Ellerman wrote: As I said in my comments on version 3 which you ignored: I think it would be clearer if we actually checked for the possibilities we allow and let everything else fall through, eg: Â Â Â Â Â Â Â Â /* Ignore user/kernel/hv bits */ Â Â Â Â Â Â Â Â branch_sample_type = ~PERF_SAMPLE_BRANCH_PLM_ALL; Â Â Â Â Â Â Â Â if (branch_sample_type == PERF_SAMPLE_BRANCH_ANY) Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â return 0; Â Â Â Â Â Â Â Â if (branch_sample_type == PERF_SAMPLE_BRANCH_ANY_CALL) Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â return POWER8_MMCRA_IFM1; Â Â Â Â Â Â Â Â Â if (branch_sample_type == PERF_SAMPLE_BRANCH_COND) Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â return POWER8_MMCRA_IFM3; Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â return -1; Hey Michael, This patch only adds support for the PERF_SAMPLE_BRANCH_COND filter, if the over all code flow does not clearly suggest that all combinations of any of these HW filters are invalid, then we can go with one more patch to clean that up before or after this patch but not here in this patch. Finally the code section here will look something like this. Does it sound good ? Better, but not quite. static u64 power8_bhrb_filter_map(u64 branch_sample_type) { u64 pmu_bhrb_filter = 0; /* BHRB and regular PMU events share the same privilege state * filter configuration. BHRB is always recorded along with a * regular PMU event. As the privilege state filter is handled * in the basic PMC configuration of the accompanying regular * PMU event, we ignore any separate BHRB specific request. */ /* Ignore user, kernel, hv bits */ branch_sample_type = ~PERF_SAMPLE_BRANCH_PLM_ALL; if (branch_sample_type == PERF_SAMPLE_BRANCH_ANY) return pmu_bhrb_filter; return 0; if (branch_sample_type == PERF_SAMPLE_BRANCH_ANY_CALL) { pmu_bhrb_filter |= POWER8_MMCRA_IFM1; return pmu_bhrb_filter; return POWER8_MMCRA_IFM1; } if (branch_sample_type == PERF_SAMPLE_BRANCH_COND) { pmu_bhrb_filter |= POWER8_MMCRA_IFM3; return pmu_bhrb_filter; return POWER8_MMCRA_IFM3; } /* Every thing else is unsupported */ return -1; } Okay, will take these changes into another patch before adding conditional branch filter here. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v12] PPC: POWERNV: move iommu_add_device earlier
The current implementation of IOMMU on sPAPR does not use iommu_ops and therefore does not call IOMMU API's bus_set_iommu() which 1) sets iommu_ops for a bus 2) registers a bus notifier Instead, PCI devices are added to IOMMU groups from subsys_initcall_sync(tce_iommu_init) which does basically the same thing without using iommu_ops callbacks. However Freescale PAMU driver (https://lkml.org/lkml/2013/7/1/158) implements iommu_ops and when tce_iommu_init is called, every PCI device is already added to some group so there is a conflict. This patch does 2 things: 1. removes the loop in which PCI devices were added to groups and adds explicit iommu_add_device() calls to add devices as soon as they get the iommu_table pointer assigned to them. 2. moves a bus notifier to powernv code in order to avoid conflict with the notifier from Freescale driver. iommu_add_device() and iommu_del_device() are public now. Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru --- Changes: v12: * removed redundant bus notifier from common POWERPC code v11: * rebased on upstream v10: * fixed linker error when IOMMU_API is not enabled v9: * removed KVM from the subject as it is not really a KVM patch so PPC mainainter (hi Ben!) can review/include it into his tree v8: * added the check for iommu_group!=NULL before removing device from a group as suggested by Wei Yang weiy...@linux.vnet.ibm.com v2: * added a helper - set_iommu_table_base_and_group - which does set_iommu_table_base() and iommu_add_device() --- arch/powerpc/include/asm/iommu.h| 26 ++ arch/powerpc/kernel/iommu.c | 41 +++-- arch/powerpc/platforms/powernv/pci-ioda.c | 8 +++--- arch/powerpc/platforms/powernv/pci-p5ioc2.c | 2 +- arch/powerpc/platforms/powernv/pci.c| 31 +- arch/powerpc/platforms/pseries/iommu.c | 8 +++--- 6 files changed, 70 insertions(+), 46 deletions(-) diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index c34656a..774fa27 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -101,8 +101,34 @@ extern void iommu_free_table(struct iommu_table *tbl, const char *node_name); */ extern struct iommu_table *iommu_init_table(struct iommu_table * tbl, int nid); +#ifdef CONFIG_IOMMU_API extern void iommu_register_group(struct iommu_table *tbl, int pci_domain_number, unsigned long pe_num); +extern int iommu_add_device(struct device *dev); +extern void iommu_del_device(struct device *dev); +#else +static inline void iommu_register_group(struct iommu_table *tbl, + int pci_domain_number, + unsigned long pe_num) +{ +} + +static inline int iommu_add_device(struct device *dev) +{ + return 0; +} + +static inline void iommu_del_device(struct device *dev) +{ +} +#endif /* !CONFIG_IOMMU_API */ + +static inline void set_iommu_table_base_and_group(struct device *dev, + void *base) +{ + set_iommu_table_base(dev, base); + iommu_add_device(dev); +} extern int iommu_map_sg(struct device *dev, struct iommu_table *tbl, struct scatterlist *sglist, int nelems, diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 572bb5b..ecbf468 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -1105,7 +1105,7 @@ void iommu_release_ownership(struct iommu_table *tbl) } EXPORT_SYMBOL_GPL(iommu_release_ownership); -static int iommu_add_device(struct device *dev) +int iommu_add_device(struct device *dev) { struct iommu_table *tbl; int ret = 0; @@ -1134,46 +1134,13 @@ static int iommu_add_device(struct device *dev) return ret; } +EXPORT_SYMBOL_GPL(iommu_add_device); -static void iommu_del_device(struct device *dev) +void iommu_del_device(struct device *dev) { iommu_group_remove_device(dev); } - -static int iommu_bus_notifier(struct notifier_block *nb, - unsigned long action, void *data) -{ - struct device *dev = data; - - switch (action) { - case BUS_NOTIFY_ADD_DEVICE: - return iommu_add_device(dev); - case BUS_NOTIFY_DEL_DEVICE: - iommu_del_device(dev); - return 0; - default: - return 0; - } -} - -static struct notifier_block tce_iommu_bus_nb = { - .notifier_call = iommu_bus_notifier, -}; - -static int __init tce_iommu_init(void) -{ - struct pci_dev *pdev = NULL; - - BUILD_BUG_ON(PAGE_SIZE IOMMU_PAGE_SIZE); - - for_each_pci_dev(pdev) - iommu_add_device(pdev-dev); - - bus_register_notifier(pci_bus_type, tce_iommu_bus_nb); - return 0; -} - -subsys_initcall_sync(tce_iommu_init); +EXPORT_SYMBOL_GPL(iommu_del_device);
Re: [PATCH 8/8] powerpc: Fix endian issues in crash dump code
On Thu, 2013-12-12 at 15:59 +1100, Anton Blanchard wrote: A couple more device tree properties that need byte swapping. Signed-off-by: Anton Blanchard an...@samba.org --- arch/powerpc/kernel/crash_dump.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index 779a78c..11c1d06 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -124,15 +124,15 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, void crash_free_reserved_phys_range(unsigned long begin, unsigned long end) { unsigned long addr; - const u32 *basep, *sizep; + const __be32 *basep, *sizep; unsigned int rtas_start = 0, rtas_end = 0; basep = of_get_property(rtas.dev, linux,rtas-base, NULL); sizep = of_get_property(rtas.dev, rtas-size, NULL); if (basep sizep) { - rtas_start = *basep; - rtas_end = *basep + *sizep; + rtas_start = be32_to_cpup(basep); + rtas_end = rtas_start + be32_to_cpup(sizep); } for (addr = begin; addr end; addr += PAGE_SIZE) { Not my favourite colour :D What about this instead? We could also add of_property_read_u32(), with an implied index of zero? I don't like the rc handling, but couldn't come up with anything I liked better. cheers diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index 11c1d06..60fd0aa 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -124,15 +124,16 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, void crash_free_reserved_phys_range(unsigned long begin, unsigned long end) { unsigned long addr; - const __be32 *basep, *sizep; + u32 base, size; unsigned int rtas_start = 0, rtas_end = 0; + int rc; - basep = of_get_property(rtas.dev, linux,rtas-base, NULL); - sizep = of_get_property(rtas.dev, rtas-size, NULL); + rc = of_property_read_u32_index(rtas.dev, linux,rtas-base, 0, base); + rc |= of_property_read_u32_index(rtas.dev, rtas-size, 0, size); - if (basep sizep) { - rtas_start = be32_to_cpup(basep); - rtas_end = rtas_start + be32_to_cpup(sizep); + if (rc == 0) { + rtas_start = base; + rtas_end = rtas_start + size; } for (addr = begin; addr end; addr += PAGE_SIZE) { ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v2] powerpc/powernv: Framework to log critical errors on powernv.
Hi Micheal, Thanks for the review. On 12/18/2013 08:13 AM, Michael Ellerman wrote: On Mon, 2013-12-16 at 18:00 +0530, Deepthi Dharwar wrote: This patch provides error logging interfaces to report critical powernv error to FSP. All the required information to dump the error is collected at POWERNV level through error log interfaces and then pushed on to FSP. This also supports synchronous error logging in cases of PANIC, by passing OPAL_ERROR_PANIC as severity in elog_create() call. Please make note of the fact that none of this code is currently used but will be in a subsequent patch. When can we expect those patches? This patch only adds the framework to log errors. Coming days this framework will be used to report all POWERNV errors in a phased manner. We would ideally be targeting one sub-system at a time and use these interfaces. diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 0f01308..1c5440a 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -168,6 +168,7 @@ extern int opal_enter_rtas(struct rtas_args *args, #define OPAL_GET_MSG85 #define OPAL_CHECK_ASYNC_COMPLETION 86 #define OPAL_SYNC_HOST_REBOOT 87 +#define OPAL_ELOG_SEND 88 #ifndef __ASSEMBLY__ @@ -260,6 +261,122 @@ enum OpalMessageType { OPAL_MSG_TYPE_MAX, }; +/* Classification of Error/Events reporting type classification Standard comment style for block comments is: /* * Classification ... */ That applies to almost all of your comments in here. + * Platform Events/Errors: Report Machine Check Interrupt I think these comments would be better inline with the values, eg: /* Report Machine Check Interrupt */ OPAL_PLATFORM, /* Report all I/O related events/errors */ OPAL_INPUT_OUTPUT, etc. Again that applies to most of your comments. Sure, I'll make it inline. + * INPUT_OUTPUT: Report all I/O related events/errors + * RESOURCE_DEALLOC: Hotplug events and errors + * MISC: Miscellanous error + * Field: error_events_type What is this Field: thing about? Field is just to add some readability that these options relate to corresponding elog_create argument field. Looks like the purpose is not getting solved. + */ +enum error_events { If you're going to define an enum you should actually use it in the API, I can't see anywhere you do? If you do want to use an enum it should be opal_error_events. Agree. +OPAL_PLATFORM, +OPAL_INPUT_OUTPUT, +OPAL_RESOURCE_DEALLOC, +OPAL_MISC, +}; +/* OPAL Subsystem IDs listed for reporting events/errors + * Field: subsystem_id + */ + +#define OPAL_PROCESSOR_SUBSYSTEM0x10 +#define OPAL_MEMORY_SUBSYSTEM 0x20 +#define OPAL_IO_SUBSYSTEM 0x30 +#define OPAL_IO_DEVICES 0x40 +#define OPAL_CEC_HARDWARE 0x50 +#define OPAL_POWER_COOLING 0x60 +#define OPAL_MISC_SUBSYSTEM 0x70 +#define OPAL_SURVEILLANCE_ERR 0x7A +#define OPAL_PLATFORM_FIRMWARE 0x80 +#define OPAL_SOFTWARE 0x90 +#define OPAL_EXTERNAL_ENV 0xA0 + +/* During reporting an event/error the following represents + * how serious the logged event/error is. (Severity) + * Field: event_sev + */ +#define OPAL_INFO 0x00 +#define OPAL_RECOVERED_ERR_GENERAL 0x10 + +/* 0x2X series is to denote set of Predictive Error + * 0x20 Generic predictive error + * 0x21 Predictive error, degraded performance + * 0x22 Predictive error, fault may be corrected after reboot + * 0x23 Predictive error, fault may be corrected after reboot, + * degraded performance + * 0x24 Predictive error, loss of redundancy + */ +#define OPAL_PREDICTIVE_ERR_GENERAL 0x20 +#define OPAL_PREDICTIVE_ERR_DEGRADED_PERF 0x21 +#define OPAL_PREDICTIVE_ERR_FAULT_RECTIFY_REBOOT0x22 +#define OPAL_PREDICTIVE_ERR_FAULT_RECTIFY_BOOT_DEGRADE_PERF 0x23 +#define OPAL_PREDICTIVE_ERR_LOSS_OF_REDUNDANCY 0x24 + +/* 0x4X series for Unrecoverable Error + * 0x40 Generic Unrecoverable error + * 0x41 Unrecoverable error bypassed with degraded performance + * 0x44 Unrecoverable error bypassed with loss of redundancy + * 0x45 Unrecoverable error bypassed with loss of redundancy and performance + * 0x48 Unrecoverable error bypassed with loss of function + */ +#define OPAL_UNRECOVERABLE_ERR_GENERAL 0x40 +#define OPAL_UNRECOVERABLE_ERR_DEGRADE_PERF 0x41 +#define OPAL_UNRECOVERABLE_ERR_LOSS_REDUNDANCY 0x44 +#define OPAL_UNRECOVERABLE_ERR_LOSS_REDUNDANCY_PERF 0x45 +#define OPAL_UNRECOVERABLE_ERR_LOSS_OF_FUNCTION 0x48 +#define OPAL_ERROR_PANIC
Re: [PATCH v2] powerpc/powernv: Framework to log critical errors on powernv.
On Wed, 2013-12-18 at 10:48 +0530, Deepthi Dharwar wrote: Hi Micheal, Thanks for the review. No worries. On 12/18/2013 08:13 AM, Michael Ellerman wrote: On Mon, 2013-12-16 at 18:00 +0530, Deepthi Dharwar wrote: +/* All the information regarding an error/event to be reported + * needs to populate this structure using pre-defined interfaces + * only + */ +struct opal_errorlog { + + uint16_t component_id; + uint8_t error_events_type:3; Bit field? + uint8_t subsystem_id; + + uint8_t event_sev; + uint8_t event_subtype; + uint8_t usr_scn_count; /* User section count */ user_section_count; + uint8_t elog_origin; + + uint32_t usr_scn_size; /* User section size */ user_section_size; + uint32_t reason_code; + uint32_t additional_info[4]; + + char usr_data_dump[OPAL_LOG_MAX_DUMP]; +}; It looks like this goes straight to Opal, should we be using __packed ? Yes, this goes straight into Opal. The structure is defined such that it is packed by default, this will not require compiler to pack bytes. Sure, but the compiler might decide to lay it out differently for some reason. You should use __packed. Also bitfields are essentially a big implementation defined behaviour sign, so I would avoid using the bitfield. diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c index 58849d0..ade1e58 100644 --- a/arch/powerpc/platforms/powernv/opal-elog.c +++ b/arch/powerpc/platforms/powernv/opal-elog.c @@ -22,7 +23,9 @@ /* Maximum size of a single log on FSP is 16KB */ #define OPAL_MAX_ERRLOG_SIZE 16384 -/* maximu number of records powernv can hold */ +#define USR_CHAR_ARRAY_FIXED_SIZE 4 What is this? struct User data section is mapped to a buffer. As all the structures are padded, we need to subtract the same to do data manipulation. Make me need to re-word it or use __packed here. Yeah that's still not really clear to me, so if you can do something that is more obvious that would be good. @@ -272,6 +275,61 @@ static int init_err_log_buffer(void) return 0; } +/* Interface to be used by POWERNV to push the logs to FSP via Sapphire */ +struct opal_errorlog *elog_create(uint8_t err_evt_type, uint16_t component_id, + uint8_t subsystem_id, uint8_t event_sev, uint8_t event_subtype, + uint32_t reason_code, uint32_t info0, uint32_t info1, + uint32_t info2, uint32_t info3) A call to this function is going to be just a giant list of integer values, it will not be easy to see at a glance which value goes in which field. I think you'd be better off with an elog_alloc() routine, and then you just do the initialisation explicitly so that it's obvious which value goes where: elog-error_events_type = FOO; elog-component_id = BAR; elog-subsystem_id = ETC; elog_create() will be called by all sub-systems on POWERNV platform to log events and errors. I feel we are better off passing all the required arguments to the interface than initialize explicitly. This would have a cleaner interface to error logging by 1) Removing huge amount of code duplication ( Each and every error/event to be reported needs to initialise fields of the opal_errorlog struct done many many times on POWERNV, results in redundant code ) It will be more lines of code, but it might be more readable code. 2) There are chances of missing out on initialising key fields if done by the user. Having an interface mandates the fields that needs to populated while logging error/events. I can always pass 0 :) We will see how it looks once there are some callers. +void commit_errorlog_to_fsp(struct opal_errorlog *buf) +{ + opal_commit_log_to_fsp((void *)(vmalloc_to_pfn(buf) PAGE_SHIFT)); Can't fail? It is better to have a return here, atleast for the caller to know if opal handling of the same is successful or not. I will make the required change. + kfree(buf); It's a bit rude to free buf when the caller still has a pointer to it. Technically, after the error log has been committed, the user is not supposed to re-use or do anything with that buffer. I need to add checks in all my routines if(buf != NULL), to handle the case where the user by mistake is trying to use the same buffer pointer. Why is the user not supposed to re-use it? kfree()'ing the buffer doesn't prevent the caller from re-using it. cheers ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v2] powerpc/powernv: Framework to log critical errors on powernv.
On 12/18/2013 10:57 AM, Michael Ellerman wrote: On Wed, 2013-12-18 at 10:48 +0530, Deepthi Dharwar wrote: Hi Micheal, Thanks for the review. No worries. On 12/18/2013 08:13 AM, Michael Ellerman wrote: On Mon, 2013-12-16 at 18:00 +0530, Deepthi Dharwar wrote: +/* All the information regarding an error/event to be reported + * needs to populate this structure using pre-defined interfaces + * only + */ +struct opal_errorlog { + + uint16_t component_id; + uint8_t error_events_type:3; Bit field? + uint8_t subsystem_id; + + uint8_t event_sev; + uint8_t event_subtype; + uint8_t usr_scn_count; /* User section count */ user_section_count; + uint8_t elog_origin; + + uint32_t usr_scn_size; /* User section size */ user_section_size; + uint32_t reason_code; + uint32_t additional_info[4]; + + char usr_data_dump[OPAL_LOG_MAX_DUMP]; +}; It looks like this goes straight to Opal, should we be using __packed ? Yes, this goes straight into Opal. The structure is defined such that it is packed by default, this will not require compiler to pack bytes. Sure, but the compiler might decide to lay it out differently for some reason. You should use __packed. Ok. Also bitfields are essentially a big implementation defined behaviour sign, so I would avoid using the bitfield. Yes, bitfields will be gone. diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c index 58849d0..ade1e58 100644 --- a/arch/powerpc/platforms/powernv/opal-elog.c +++ b/arch/powerpc/platforms/powernv/opal-elog.c @@ -22,7 +23,9 @@ /* Maximum size of a single log on FSP is 16KB */ #define OPAL_MAX_ERRLOG_SIZE 16384 -/* maximu number of records powernv can hold */ +#define USR_CHAR_ARRAY_FIXED_SIZE 4 What is this? struct User data section is mapped to a buffer. As all the structures are padded, we need to subtract the same to do data manipulation. Make me need to re-word it or use __packed here. Yeah that's still not really clear to me, so if you can do something that is more obvious that would be good. Sure. @@ -272,6 +275,61 @@ static int init_err_log_buffer(void) return 0; } +/* Interface to be used by POWERNV to push the logs to FSP via Sapphire */ +struct opal_errorlog *elog_create(uint8_t err_evt_type, uint16_t component_id, + uint8_t subsystem_id, uint8_t event_sev, uint8_t event_subtype, + uint32_t reason_code, uint32_t info0, uint32_t info1, + uint32_t info2, uint32_t info3) A call to this function is going to be just a giant list of integer values, it will not be easy to see at a glance which value goes in which field. I think you'd be better off with an elog_alloc() routine, and then you just do the initialisation explicitly so that it's obvious which value goes where: elog-error_events_type = FOO; elog-component_id = BAR; elog-subsystem_id = ETC; elog_create() will be called by all sub-systems on POWERNV platform to log events and errors. I feel we are better off passing all the required arguments to the interface than initialize explicitly. This would have a cleaner interface to error logging by 1) Removing huge amount of code duplication ( Each and every error/event to be reported needs to initialise fields of the opal_errorlog struct done many many times on POWERNV, results in redundant code ) It will be more lines of code, but it might be more readable code. 2) There are chances of missing out on initialising key fields if done by the user. Having an interface mandates the fields that needs to populated while logging error/events. I can always pass 0 :) I was referring to more on the lines of missing unintentionally :) We will see how it looks once there are some callers. Sure. I will retain it for now. Going forward once we start adding users exploiting this interface, we can then take a call. +void commit_errorlog_to_fsp(struct opal_errorlog *buf) +{ + opal_commit_log_to_fsp((void *)(vmalloc_to_pfn(buf) PAGE_SHIFT)); Can't fail? It is better to have a return here, atleast for the caller to know if opal handling of the same is successful or not. I will make the required change. + kfree(buf); It's a bit rude to free buf when the caller still has a pointer to it. Technically, after the error log has been committed, the user is not supposed to re-use or do anything with that buffer. I need to add checks in all my routines if(buf != NULL), to handle the case where the user by mistake is trying to use the same buffer pointer. Why is the user not supposed to re-use it? kfree()'ing the buffer doesn't prevent the caller from re-using it. Release the memory. Assign pointer to NULL before returning. All the error logging interfaces should have NULL check (to return). User can't do much in that case. Regards, Deepthi cheers ___
Re: [PATCH] powerpc: book3s: kvm: Don't abuse host r2 in exit path
Hi Alex, Any update on this ? We need this to got into 3.13. -aneesh Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com writes: From: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com We don't use PACATOC for PR. Avoid updating HOST_R2 with PR KVM mode when both HV and PR are enabled in the kernel. Without this we get the below crash (qemu) Unable to handle kernel paging request for data at address 0x8310 Faulting instruction address: 0xc001d5a4 cpu 0x2: Vector: 300 (Data Access) at [c001dc53aef0] pc: c001d5a4: .vtime_delta.isra.1+0x34/0x1d0 lr: c001d760: .vtime_account_system+0x20/0x60 sp: c001dc53b170 msr: 80009032 dar: 8310 dsisr: 4000 current = 0xc001d76c62d0 paca= 0xcfef1100 softe: 0irq_happened: 0x01 pid = 4472, comm = qemu-system-ppc enter ? for help [c001dc53b200] c001d760 .vtime_account_system+0x20/0x60 [c001dc53b290] c008d050 .kvmppc_handle_exit_pr+0x60/0xa50 [c001dc53b340] c008f51c kvm_start_lightweight+0xb4/0xc4 [c001dc53b510] c008cdf0 .kvmppc_vcpu_run_pr+0x150/0x2e0 [c001dc53b9e0] c008341c .kvmppc_vcpu_run+0x2c/0x40 [c001dc53ba50] c0080af4 .kvm_arch_vcpu_ioctl_run+0x54/0x1b0 [c001dc53bae0] c007b4c8 .kvm_vcpu_ioctl+0x478/0x730 [c001dc53bca0] c02140cc .do_vfs_ioctl+0x4ac/0x770 [c001dc53bd80] c02143e8 .SyS_ioctl+0x58/0xb0 [c001dc53be30] c0009e58 syscall_exit+0x0/0x98 --- Exception: c00 (System Call) at 1f960160 SP (1ecbe3c0) is in userspace These changes were originally part of http://mid.gmane.org/20130806042205.gr19...@iris.ozlabs.ibm.com Signed-off-by: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com --- arch/powerpc/include/asm/kvm_book3s_asm.h | 1 + arch/powerpc/kernel/asm-offsets.c | 1 + arch/powerpc/kvm/book3s_hv_rmhandlers.S | 7 +++ 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h index 0bd9348..69fe837 100644 --- a/arch/powerpc/include/asm/kvm_book3s_asm.h +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h @@ -79,6 +79,7 @@ struct kvmppc_host_state { ulong vmhandler; ulong scratch0; ulong scratch1; + ulong scratch2; u8 in_guest; u8 restore_hid5; u8 napping; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 8e6ede6..841a4c8 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -583,6 +583,7 @@ int main(void) HSTATE_FIELD(HSTATE_VMHANDLER, vmhandler); HSTATE_FIELD(HSTATE_SCRATCH0, scratch0); HSTATE_FIELD(HSTATE_SCRATCH1, scratch1); + HSTATE_FIELD(HSTATE_SCRATCH2, scratch2); HSTATE_FIELD(HSTATE_IN_GUEST, in_guest); HSTATE_FIELD(HSTATE_RESTORE_HID5, restore_hid5); HSTATE_FIELD(HSTATE_NAPPING, napping); diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 339aa5e..16f7654 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -750,15 +750,14 @@ kvmppc_interrupt_hv: * guest CR, R12 saved in shadow VCPU SCRATCH1/0 * guest R13 saved in SPRN_SCRATCH0 */ - /* abuse host_r2 as third scratch area; we get r2 from PACATOC(r13) */ - std r9, HSTATE_HOST_R2(r13) + std r9, HSTATE_SCRATCH2(r13) lbz r9, HSTATE_IN_GUEST(r13) cmpwi r9, KVM_GUEST_MODE_HOST_HV beq kvmppc_bad_host_intr #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE cmpwi r9, KVM_GUEST_MODE_GUEST - ld r9, HSTATE_HOST_R2(r13) + ld r9, HSTATE_SCRATCH2(r13) beq kvmppc_interrupt_pr #endif /* We're now back in the host but in guest MMU context */ @@ -778,7 +777,7 @@ kvmppc_interrupt_hv: std r6, VCPU_GPR(R6)(r9) std r7, VCPU_GPR(R7)(r9) std r8, VCPU_GPR(R8)(r9) - ld r0, HSTATE_HOST_R2(r13) + ld r0, HSTATE_SCRATCH2(r13) std r0, VCPU_GPR(R9)(r9) std r10, VCPU_GPR(R10)(r9) std r11, VCPU_GPR(R11)(r9) -- 1.8.3.2 -- To unsubscribe from this list: send the line unsubscribe kvm-ppc in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev