[PATCH v6 2/4] powerpc/85xx: add hardware automatically enter altivec idle state

2013-12-17 Thread Dongsheng Wang
From: Wang Dongsheng dongsheng.w...@freescale.com

Each core's AltiVec unit may be placed into a power savings mode
by turning off power to the unit. Core hardware will automatically
power down the AltiVec unit after no AltiVec instructions have
executed in N cycles. The AltiVec power-control is triggered by hardware.

Signed-off-by: Wang Dongsheng dongsheng.w...@freescale.com
---
*v3:
Assembly code instead of C code.

*v2:
Remove:
delete setup_idle_hw_governor function.
delete Fix erratum for rev1.

Move:
move setup_* into __setup/restore_cpu_e6500.

 arch/powerpc/kernel/cpu_setup_fsl_booke.S | 21 +
 1 file changed, 21 insertions(+)

diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S 
b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
index bfb18c7..4789056 100644
--- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S
+++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
@@ -53,11 +53,31 @@ _GLOBAL(__e500_dcache_setup)
isync
blr
 
+/*
+ * FIXME - we haven't yet done testing to determine a reasonable default
+ * value for AV_WAIT_IDLE_BIT.
+ */
+#define AV_WAIT_IDLE_BIT   50 /* 1ms, TB frequency is 41.66MHZ */
+_GLOBAL(setup_altivec_idle)
+   mfspr   r3, SPRN_PWRMGTCR0
+
+   /* Enable Altivec Idle */
+   orisr3, r3, PWRMGTCR0_AV_IDLE_PD_EN@h
+   li  r11, AV_WAIT_IDLE_BIT
+
+   /* Set Automatic AltiVec Idle Count */
+   rlwimi  r3, r11, PWRMGTCR0_AV_IDLE_CNT_SHIFT, PWRMGTCR0_AV_IDLE_CNT
+
+   mtspr   SPRN_PWRMGTCR0, r3
+
+   blr
+
 _GLOBAL(__setup_cpu_e6500)
mflrr6
 #ifdef CONFIG_PPC64
bl  .setup_altivec_ivors
 #endif
+   bl  setup_altivec_idle
bl  __setup_cpu_e5500
mtlrr6
blr
@@ -119,6 +139,7 @@ _GLOBAL(__setup_cpu_e5500)
 _GLOBAL(__restore_cpu_e6500)
mflrr5
bl  .setup_altivec_ivors
+   bl  .setup_altivec_idle
bl  __restore_cpu_e5500
mtlrr5
blr
-- 
1.8.0


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH v6 3/4] powerpc/85xx: add hardware automatically enter pw20 state

2013-12-17 Thread Dongsheng Wang
From: Wang Dongsheng dongsheng.w...@freescale.com

Using hardware features make core automatically enter PW20 state.
Set a TB count to hardware, the effective count begins when PW10
is entered. When the effective period has expired, the core will
proceed from PW10 to PW20 if no exit conditions have occurred during
the period.

Signed-off-by: Wang Dongsheng dongsheng.w...@freescale.com
---
*v3:
Assembly code instead of C code.

*v2:
Remove:
delete setup_idle_hw_governor function.
delete Fix erratum for rev1.

Move:
move setup_* into __setup/restore_cpu_e6500.

 arch/powerpc/kernel/cpu_setup_fsl_booke.S | 21 +
 1 file changed, 21 insertions(+)

diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S 
b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
index 4789056..49e738e 100644
--- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S
+++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
@@ -55,6 +55,25 @@ _GLOBAL(__e500_dcache_setup)
 
 /*
  * FIXME - we haven't yet done testing to determine a reasonable default
+ * value for PW20_WAIT_IDLE_BIT.
+ */
+#define PW20_WAIT_IDLE_BIT 50 /* 1ms, TB frequency is 41.66MHZ */
+_GLOBAL(setup_pw20_idle)
+   mfspr   r3, SPRN_PWRMGTCR0
+
+   /* Set PW20_WAIT bit, enable pw20 state*/
+   ori r3, r3, PWRMGTCR0_PW20_WAIT
+   li  r11, PW20_WAIT_IDLE_BIT
+
+   /* Set Automatic PW20 Core Idle Count */
+   rlwimi  r3, r11, PWRMGTCR0_PW20_ENT_SHIFT, PWRMGTCR0_PW20_ENT
+
+   mtspr   SPRN_PWRMGTCR0, r3
+
+   blr
+
+/*
+ * FIXME - we haven't yet done testing to determine a reasonable default
  * value for AV_WAIT_IDLE_BIT.
  */
 #define AV_WAIT_IDLE_BIT   50 /* 1ms, TB frequency is 41.66MHZ */
@@ -77,6 +96,7 @@ _GLOBAL(__setup_cpu_e6500)
 #ifdef CONFIG_PPC64
bl  .setup_altivec_ivors
 #endif
+   bl  setup_pw20_idle
bl  setup_altivec_idle
bl  __setup_cpu_e5500
mtlrr6
@@ -139,6 +159,7 @@ _GLOBAL(__setup_cpu_e5500)
 _GLOBAL(__restore_cpu_e6500)
mflrr5
bl  .setup_altivec_ivors
+   bl  .setup_pw20_idle
bl  .setup_altivec_idle
bl  __restore_cpu_e5500
mtlrr5
-- 
1.8.0


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH v6 4/4] powerpc/85xx: add sysfs for pw20 state and altivec idle

2013-12-17 Thread Dongsheng Wang
From: Wang Dongsheng dongsheng.w...@freescale.com

Add a sys interface to enable/diable pw20 state or altivec idle, and
control the wait entry time.

Enable/Disable interface:
0, disable. 1, enable.
/sys/devices/system/cpu/cpuX/pw20_state
/sys/devices/system/cpu/cpuX/altivec_idle

Set wait time interface:(Nanosecond)
/sys/devices/system/cpu/cpuX/pw20_wait_time
/sys/devices/system/cpu/cpuX/altivec_idle_wait_time
Example: Base on TBfreq is 41MHZ.
1~48(ns): TB[63]
49~97(ns): TB[62]
98~195(ns): TB[61]
196~390(ns): TB[60]
391~780(ns): TB[59]
781~1560(ns): TB[58]
...

Signed-off-by: Wang Dongsheng dongsheng.w...@freescale.com
---
*v6:
Change show_pw20_wait_time/show_altivec_idle_wait_time functions implementation.

*v5:
Change get_idle_ticks_bit function implementation.

*v4:
Move code from 85xx/common.c to kernel/sysfs.c.

Remove has_pw20_altivec_idle function.

Change wait entry_bit to wait time.

diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index b4e6676..6c92e23 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -86,6 +86,304 @@ __setup(smt-snooze-delay=, setup_smt_snooze_delay);
 
 #endif /* CONFIG_PPC64 */
 
+#ifdef CONFIG_FSL_SOC
+#define MAX_BIT63
+
+static u64 pw20_wt;
+static u64 altivec_idle_wt;
+
+static unsigned int get_idle_ticks_bit(u64 ns)
+{
+   u64 cycle;
+
+   if (ns = 1)
+   cycle = div_u64(ns + 500, 1000) * tb_ticks_per_usec;
+   else
+   cycle = div_u64(ns * tb_ticks_per_usec, 1000);
+
+   if (!cycle)
+   return 0;
+
+   return ilog2(cycle);
+}
+
+static void do_show_pwrmgtcr0(void *val)
+{
+   u32 *value = val;
+
+   *value = mfspr(SPRN_PWRMGTCR0);
+}
+
+static ssize_t show_pw20_state(struct device *dev,
+   struct device_attribute *attr, char *buf)
+{
+   u32 value;
+   unsigned int cpu = dev-id;
+
+   smp_call_function_single(cpu, do_show_pwrmgtcr0, value, 1);
+
+   value = PWRMGTCR0_PW20_WAIT;
+
+   return sprintf(buf, %u\n, value ? 1 : 0);
+}
+
+static void do_store_pw20_state(void *val)
+{
+   u32 *value = val;
+   u32 pw20_state;
+
+   pw20_state = mfspr(SPRN_PWRMGTCR0);
+
+   if (*value)
+   pw20_state |= PWRMGTCR0_PW20_WAIT;
+   else
+   pw20_state = ~PWRMGTCR0_PW20_WAIT;
+
+   mtspr(SPRN_PWRMGTCR0, pw20_state);
+}
+
+static ssize_t store_pw20_state(struct device *dev,
+   struct device_attribute *attr,
+   const char *buf, size_t count)
+{
+   u32 value;
+   unsigned int cpu = dev-id;
+
+   if (kstrtou32(buf, 0, value))
+   return -EINVAL;
+
+   if (value  1)
+   return -EINVAL;
+
+   smp_call_function_single(cpu, do_store_pw20_state, value, 1);
+
+   return count;
+}
+
+static ssize_t show_pw20_wait_time(struct device *dev,
+   struct device_attribute *attr, char *buf)
+{
+   u32 value;
+   u64 tb_cycle = 1;
+   u64 time;
+
+   unsigned int cpu = dev-id;
+
+   if (!pw20_wt) {
+   smp_call_function_single(cpu, do_show_pwrmgtcr0, value, 1);
+   value = (value  PWRMGTCR0_PW20_ENT) 
+   PWRMGTCR0_PW20_ENT_SHIFT;
+
+   tb_cycle = (tb_cycle  (MAX_BIT - value + 1));
+   /* convert ms to ns */
+   if (tb_ticks_per_usec  1000) {
+   time = div_u64(tb_cycle, tb_ticks_per_usec / 1000);
+   } else {
+   u32 rem_us;
+
+   time = div_u64_rem(tb_cycle, tb_ticks_per_usec,
+   rem_us);
+   time = time * 1000 + rem_us * 1000 / tb_ticks_per_usec;
+   }
+   } else {
+   time = pw20_wt;
+   }
+
+   return sprintf(buf, %llu\n, time  0 ? time : 0);
+}
+
+static void set_pw20_wait_entry_bit(void *val)
+{
+   u32 *value = val;
+   u32 pw20_idle;
+
+   pw20_idle = mfspr(SPRN_PWRMGTCR0);
+
+   /* Set Automatic PW20 Core Idle Count */
+   /* clear count */
+   pw20_idle = ~PWRMGTCR0_PW20_ENT;
+
+   /* set count */
+   pw20_idle |= ((MAX_BIT - *value)  PWRMGTCR0_PW20_ENT_SHIFT);
+
+   mtspr(SPRN_PWRMGTCR0, pw20_idle);
+}
+
+static ssize_t store_pw20_wait_time(struct device *dev,
+   struct device_attribute *attr,
+   const char *buf, size_t count)
+{
+   u32 entry_bit;
+   u64 value;
+
+   unsigned int cpu = dev-id;
+
+   if (kstrtou64(buf, 0, value))
+   return -EINVAL;
+
+   if (!value)
+   return -EINVAL;
+
+   entry_bit = get_idle_ticks_bit(value);
+   if (entry_bit  MAX_BIT)
+   return -EINVAL;
+
+   pw20_wt = value;
+
+   

Re: [PATCH] DTS: DMA: Fix DMA3 interrupts

2013-12-17 Thread Li Yang
On Wed, Dec 11, 2013 at 2:33 AM, Scott Wood scottw...@freescale.com wrote:
 On Tue, 2013-12-10 at 18:33 +0800, Hongbo Zhang wrote:
 Scott,
 This issue is due to the non-continuous MPIC register, I think there is
 two ways to fix it.

 The first one is as what we are discussing, in fact the Bman/Qman DT
 author had introduced this way, and I had to follow it, it is a trick,
 adding 208 is a bit ugly I think, and even difficult to explain it to
 customers etc, but this way changes less codes.

 The second one is editing MPIC related codes without adding 208 to high
 interrupts. The point of translate interrupt number to MPIC register
 address is a so called 'isu' mechanism, we can do like the following
 example codes, then the tricky adding 208 isn't needed any more.

 Which one do you prefer?
 In fact I myself prefer the second, if the idea is acceptable, I will
 send a patch instead of this one. (and also alone with the internal
 patch decreasing 208 for the Bman/Qman)

 void __init corenet_ds_pic_init(void)
 {
  ..

  mpic = mpic_alloc(NULL, 0, flags, 0, 512, OpenPIC);
  BUG_ON(mpic == NULL);

 // Add this start
  for (i = 0; i  17; i++) {
  if (i  11)
  addr_off = 0x1 + 0x20 * 16 * i;
  else
  addr_off = 0x13000 + 0x20 * 16 * (i - 11);  /* scape the
 address not for interrupts */
  mpic_assign_isu(mpic, i, mpic-paddr + addr_off);
  }
 // Add this end

  mpic_init(mpic);
 }

 NACK

 We already have a binding that states that the interrupt number is based
 on the register offset, rather than whatever arbitrary numbers hardware
 documenters decide to use next week.

 While I'm not terribly happy with the usability of this, especially now
 that it's not a simple add 16, redefining the existing binding is not
 OK (and in any case the code above seems obfuscatory).  If we decide to
 do something other than continue with register offset divided by 32,
 then we need to define a new interrupt type (similar to current defined
 types of error interrupt, timer, and IPI) for the new numberspace -- and
 it should be handled when decoding that type of interrupt specifier,
 rather than with the isu mechanism.

I had to say that the current binding is terribly confusing.  I know a
lot of people who were confused while looking into the device tree and
I had to help them out now and then.  I even got confused for some
time at the very beginning.  :(  If we want to keep the current
binding, a big warning message is well deserved at the very beginning
of the binding document to clarify that the interrupt number used in
device tree has nothing to do with the interrupt number mentioned in
the silicon reference manuals.  I think we should even mention the
add 16 magic rule also to make it more intuitive.

Adding a new interrupt type for external interrupts is good to make
the interrupt number consistent with the Reference Manuals.  But doing
that requires changing all the device trees that used the previous
binding, and we need a mechanism of judging which binding a device
tree is complying to.

Hongbo's proposal is not a complete fix for the readability issue.
But it makes the add 16 magic rule continue to work, instead of
making it even worse by introducing the +208 interrupts.  The DMA3
patch is the first time that we add the +208 interrupts in the
device trees of mainline kernel.  It is a good time now to prevent us
making the readability even worse while not breaking things out there.
 ISU is a standard mechanism in MPIC to deal with sparse configuration
register space, and IMO a good fit for this situation.

Regards,
Leo
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Chip Select register with 8bit bus size...

2013-12-17 Thread neorf3k
We have a problem to communicate with a register, CS4, at at 0x1002. In 
U-boot that reg has the value 0x45fab3c1, but when we try to access it we get:  
0x10101010 and we are not able to write too.

With CS3 everything seems ok, we can read and write. CS3 is at: 0x1000.

The main/only differences between cs3 and cs4 are:

Chip Select: Lp_cs3
Bus size: 32 bit
Bus control: 2 wait state R/W ACK disabled
Allocated size 32Kbyte

Chip Select: Lp_cs4
Bus size: 8 bit
Bus control: 2 wait state R/W ACK disabled
Allocated size: 4 KByte

In userspace we use:
/**/
//code from memedit.c
int fd;
fd = open(/dev/mem, O_SYNC | O_RDWR);
mem = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED, fd, offset  
(~4095));
printf(/dev/mem[0x%08x] = 0x%08x, offset, *(unsigned int*)mem[offset  
4095]);

//to write
*((unsigned int *)mem[offset  4095]) = input;
/**/

In our kernel module:
/**/
#define CS4_START0x1002U
#define CS4_STOP 0x1004U
#define CS4_SIZE 0x0002U
#define CS3_START0x1000U
#define CS3_STOP 0x1002U
#define CS3_SIZE 0x0002U

 void __iomem *cs3_ioaddr   = ioremap ((volatile unsigned long)(CS3_START), 
CS3_SIZE);
 printk(We read value at CS3: %x \n\n\n,in_be32(cs3_ioaddr+0x0018));
 out_be32(cs3_ioaddr+0x0018,0x0001);
 printk(We read written value: %x \n\n\n,in_be32(cs3_ioaddr+0x0018));
/**/

Platform is based on mpc5200b CPU and fpga is a Xilinx Virtex4.
Kernel we use: 2.6.33

Thanks again in advance…

Lorenzo
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


commit e38c0a1f breaks powerpc boards with uli1575 chip

2013-12-17 Thread Nikita Yushchenko
Hi

While trying to make freescale p2020ds and  mpc8572ds boards working with 
mainline kernel, I faced that commit 
e38c0a1f (Handle #address-cells  2 specially) breaks things with these boards.

Both these boards have uli1575 chip.
Corresponding part in device tree is something like

uli1575@0 {
reg = 0x0 0x0 0x0 0x0 0x0;
#size-cells = 2;
#address-cells = 3;
ranges = 0x200 0x0 0x8000
  0x200 0x0 0x8000
  0x0 0x2000

  0x100 0x0 0x0
  0x100 0x0 0x0
  0x0 0x1;
isa@1e {
...

I.e. it has #address-cells = 3


With commit e38c0a1f reverted, devices under uli1575 are registered correctly, 
e.g. for rtc

OF: ** translation for device /pcie@ffe09000/pcie@0/uli1575@0/isa@1e/rtc@70 **
OF: bus is isa (na=2, ns=1) on /pcie@ffe09000/pcie@0/uli1575@0/isa@1e
OF: translating address: 0001 0070
OF: parent bus is default (na=3, ns=2) on /pcie@ffe09000/pcie@0/uli1575@0
OF: walking ranges...
OF: ISA map, cp=0, s=1000, da=70
OF: parent translation for: 0100  
OF: with offset: 70
OF: one level translation:   0070
OF: parent bus is pci (na=3, ns=2) on /pcie@ffe09000/pcie@0
OF: walking ranges...
OF: default map, cp=a000, s=2000, da=70
OF: default map, cp=0, s=1, da=70
OF: parent translation for: 0100  
OF: with offset: 70
OF: one level translation: 0100  0070
OF: parent bus is pci (na=3, ns=2) on /pcie@ffe09000
OF: walking ranges...
OF: PCI map, cp=0, s=1, da=70
OF: parent translation for: 0100  
OF: with offset: 70
OF: one level translation: 0100  0070
OF: parent bus is default (na=2, ns=2) on /
OF: walking ranges...
OF: PCI map, cp=0, s=1, da=70
OF: parent translation for:  ffc1
OF: with offset: 70
OF: one level translation:  ffc10070
OF: reached root node

With commit e38c0a1f in place, address translation fails:

OF: ** translation for device /pcie@ffe09000/pcie@0/uli1575@0/isa@1e/rtc@70 **
OF: bus is isa (na=2, ns=1) on /pcie@ffe09000/pcie@0/uli1575@0/isa@1e
OF: translating address: 0001 0070
OF: parent bus is default (na=3, ns=2) on /pcie@ffe09000/pcie@0/uli1575@0
OF: walking ranges...
OF: ISA map, cp=0, s=1000, da=70
OF: parent translation for: 0100  
OF: with offset: 70
OF: one level translation:   0070
OF: parent bus is pci (na=3, ns=2) on /pcie@ffe09000/pcie@0
OF: walking ranges...
OF: default map, cp=a000, s=2000, da=70
OF: default map, cp=0, s=1, da=70
OF: not found !

Either e38c0a1f should be reverted, or uli1575 (and perhaps other similar 
devices) have to be described in device 
trees differently.

Could someone please comment on this?
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH v2 00/10] Kconfig: cleanup SERIO_I8042 dependencies

2013-12-17 Thread Mark Salter
This patch series removes the messy dependencies from SERIO_I8042
by having it depend on one variable (ARCH_MAY_HAVE_PC_SERIO) and
having architectures which need it select that variable in
arch/*/Kconfig.

New architectures are unlikely to need SERIO_I8042, so this avoids
having an ever growing list of architectures to exclude. If an
architecture without i8042 support isn't excluded through the
dependency list for SERIO_I8042 or through explicit disabling in
a config, it will likely panic on boot with something similar to
this (from arm64):

[   27.426181] [ffc000403b1c] i8042_flush+0x88/0x10c
[   27.426251] [ffc00084cc2c] i8042_init+0x58/0xe8
[   27.426320] [ffc80bec] do_one_initcall+0xc4/0x14c
[   27.426404] [ffc000820970] kernel_init_freeable+0x1a4/0x244
[   27.426480] [ffc0005a894c] kernel_init+0x18/0x148
[   27.426561] Code: d2800c82 f2bf7c02 f2dff7e2 f2e2 (39400042) 
[   27.426789] ---[ end trace ac076843cf0f383e ]---
[   27.426875] Kernel panic - not syncing: Attempted to kill init! 
exitcode=0x000b

This is v2 of the patch series. Changes from version 1:

  o Added acks. arm, ia64, and sh are only ones without acks.
  o Moved select of ARCH_MIGHT_HAVE_PC_SERIO to board-specific
Kconfigs for arm and sh.

A tree with these patches is at:

   git://github.com/mosalter/linux.git (serio-i8042-v2 branch)


Mark Salter (10):
  alpha: select ARCH_MIGHT_HAVE_PC_SERIO
  arm: select ARCH_MIGHT_HAVE_PC_SERIO
  ia64: select ARCH_MIGHT_HAVE_PC_SERIO
  mips: select ARCH_MIGHT_HAVE_PC_SERIO
  powerpc: select ARCH_MIGHT_HAVE_PC_SERIO
  sh: select ARCH_MIGHT_HAVE_PC_SERIO for SH_CAYMAN
  sparc: select ARCH_MIGHT_HAVE_PC_SERIO
  unicore32: select ARCH_MIGHT_HAVE_PC_SERIO
  x86: select ARCH_MIGHT_HAVE_PC_SERIO
  Kconfig: cleanup SERIO_I8042 dependencies

 arch/alpha/Kconfig   |  1 +
 arch/arm/mach-footbridge/Kconfig |  1 +
 arch/ia64/Kconfig|  1 +
 arch/mips/Kconfig|  1 +
 arch/powerpc/Kconfig |  1 +
 arch/sh/boards/Kconfig   |  1 +
 arch/sparc/Kconfig   |  1 +
 arch/unicore32/Kconfig   |  1 +
 arch/x86/Kconfig |  1 +
 drivers/input/serio/Kconfig  | 11 ---
 10 files changed, 17 insertions(+), 3 deletions(-)

-- 
1.8.3.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH v2 10/10] Kconfig: cleanup SERIO_I8042 dependencies

2013-12-17 Thread Mark Salter
Remove messy dependencies from SERIO_I8042 by having it depend on one
Kconfig symbol (ARCH_MIGHT_HAVE_PC_SERIO) and having architectures
which need it select ARCH_MIGHT_HAVE_PC_SERIO in arch/*/Kconfig.
New architectures are unlikely to need SERIO_I8042, so this avoids
having an ever growing list of architectures to exclude.

Signed-off-by: Mark Salter msal...@redhat.com
Acked-by: H. Peter Anvin h...@zytor.com
Acked-by: Ralf Baechle r...@linux-mips.org
Acked-by: Benjamin Herrenschmidt b...@kernel.crashing.org
CC: Dmitry Torokhov dmitry.torok...@gmail.com
CC: Richard Henderson r...@twiddle.net
CC: linux-al...@vger.kernel.org
CC: Russell King li...@arm.linux.org.uk
CC: linux-arm-ker...@lists.infradead.org
CC: Tony Luck tony.l...@intel.com
CC: Fenghua Yu fenghua...@intel.com
CC: linux-i...@vger.kernel.org
CC: linux-m...@linux-mips.org
CC: Paul Mackerras pau...@samba.org
CC: linuxppc-dev@lists.ozlabs.org
CC: Paul Mundt let...@linux-sh.org
CC: linux...@vger.kernel.org
CC: David S. Miller da...@davemloft.net
CC: sparcli...@vger.kernel.org
CC: Guan Xuetao g...@mprc.pku.edu.cn
CC: Ingo Molnar mi...@redhat.com
CC: Thomas Gleixner t...@linutronix.de
CC: x...@kernel.org
---
 drivers/input/serio/Kconfig | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/input/serio/Kconfig b/drivers/input/serio/Kconfig
index 8541f94..1f5cec2 100644
--- a/drivers/input/serio/Kconfig
+++ b/drivers/input/serio/Kconfig
@@ -16,14 +16,19 @@ config SERIO
  To compile this driver as a module, choose M here: the
  module will be called serio.
 
+config ARCH_MIGHT_HAVE_PC_SERIO
+   bool
+   help
+ Select this config option from the architecture Kconfig if
+ the architecture might use a PC serio device (i8042) to
+  communicate with keyboard, mouse, etc.
+
 if SERIO
 
 config SERIO_I8042
tristate i8042 PC Keyboard controller
default y
-   depends on !PARISC  (!ARM || FOOTBRIDGE_HOST)  \
-  (!SUPERH || SH_CAYMAN)  !M68K  !BLACKFIN  !S390  \
-  !ARC
+   depends on ARCH_MIGHT_HAVE_PC_SERIO
help
  i8042 is the chip over which the standard AT keyboard and PS/2
  mouse are connected to the computer. If you use these devices,
-- 
1.8.3.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH v2 05/10] powerpc: select ARCH_MIGHT_HAVE_PC_SERIO

2013-12-17 Thread Mark Salter
Architectures which might use an i8042 for serial IO to keyboard,
mouse, etc should select ARCH_MIGHT_HAVE_PC_SERIO.

Signed-off-by: Mark Salter msal...@redhat.com
Acked-by: Benjamin Herrenschmidt b...@kernel.crashing.org
CC: Paul Mackerras pau...@samba.org
CC: linuxppc-dev@lists.ozlabs.org
---
 arch/powerpc/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index b44b52c..fb75485 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -86,6 +86,7 @@ config PPC
bool
default y
select ARCH_MIGHT_HAVE_PC_PARPORT
+   select ARCH_MIGHT_HAVE_PC_SERIO
select BINFMT_ELF
select OF
select OF_EARLY_FLATTREE
-- 
1.8.3.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH] of/irq: Fix device_node refcount in of_irq_parse_raw()

2013-12-17 Thread Cédric Le Goater
Commit 2361613206e6, of/irq: Refactor interrupt-map parsing changed
the refcount on the device_node causing an error in of_node_put():

ERROR: Bad of_node_put() on /pci@8002000
CPU: 0 PID: 1 Comm: swapper/0 Not tainted 3.13.0-rc3-dirty #2
Call Trace:
[c0003e403500] [c00144fc] .show_stack+0x7c/0x1f0 (unreliable)
[c0003e4035d0] [c070f250] .dump_stack+0x88/0xb4
[c0003e403650] [c05e8768] .of_node_release+0xd8/0xf0
[c0003e4036e0] [c05eeafc] .of_irq_parse_one+0x10c/0x280
[c0003e4037a0] [c05efd4c] .of_irq_parse_pci+0x3c/0x1d0
[c0003e403840] [c0038240] .pcibios_setup_device+0xa0/0x2e0
[c0003e403910] [c00398f0] .pcibios_setup_bus_devices+0x60/0xd0
[c0003e403990] [c003b3a4] .__of_scan_bus+0x1a4/0x2b0
[c0003e403a80] [c003a62c] .pcibios_scan_phb+0x30c/0x410
[c0003e403b60] [c09fe430] .pcibios_init+0x7c/0xd4

This patch adjusts the refcount in the walk of the interrupt tree.
When a match is found, there is no need to increase the refcount 
on 'out_irq-np' as 'newpar' is already holding a ref. The refcount
balance between 'ipar' and 'newpar' is maintained in the skiplevel:
goto label.

This patch also removes the usage of the device_node variable 'old' 
which seems useless after the latest changes.

Signed-off-by: Cédric Le Goater c...@fr.ibm.com
---

This patch was tested on powerpc, pseries and powernv. This is a 
new area for me so I might have missed a path. Please take a look.

We could now introduce an helper routine to look for #address-cells in
of_irq_parse_raw(). This can be the subject of another patch.

Thanks,

C.

 drivers/of/irq.c |5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/of/irq.c b/drivers/of/irq.c
index 786b0b47fae4..27212402c532 100644
--- a/drivers/of/irq.c
+++ b/drivers/of/irq.c
@@ -165,7 +165,6 @@ int of_irq_parse_raw(const __be32 *addr, struct 
of_phandle_args *out_irq)
if (of_get_property(ipar, interrupt-controller, NULL) !=
NULL) {
pr_debug( - got it !\n);
-   of_node_put(old);
return 0;
}
 
@@ -250,8 +249,7 @@ int of_irq_parse_raw(const __be32 *addr, struct 
of_phandle_args *out_irq)
 * Successfully parsed an interrrupt-map translation; copy new
 * interrupt specifier into the out_irq structure
 */
-   of_node_put(out_irq-np);
-   out_irq-np = of_node_get(newpar);
+   out_irq-np = newpar;
 
match_array = imap - newaddrsize - newintsize;
for (i = 0; i  newintsize; i++)
@@ -268,7 +266,6 @@ int of_irq_parse_raw(const __be32 *addr, struct 
of_phandle_args *out_irq)
}
  fail:
of_node_put(ipar);
-   of_node_put(out_irq-np);
of_node_put(newpar);
 
return -EINVAL;
-- 
1.7.10.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 4/5] powerpc/fsl-booke: Add initial T208x QDS board support

2013-12-17 Thread Scott Wood
On Wed, 2013-12-11 at 19:19 +0800, Shengzhou Liu wrote:
 + boardctrl: board-control@3,0 {
 + #address-cells = 1;
 + #size-cells = 1;
 + compatible = fsl,fpga-qixis;
 + reg = 3 0 0x300;
 + ranges = 0 3 0 0x300;
 + };

Why do you have ranges and #address-cells/#size-cells here?  When would
there ever be a child node?

 + };
 +
 + memory {
 + device_type = memory;
 + };
 +
 + dcsr: dcsr@f {
 + ranges = 0x 0xf 0x 0x01072000;
 + };
 +
 + soc: soc@ffe00 {
 + ranges = 0x 0xf 0xfe00 0x100;
 + reg = 0xf 0xfe00 0 0x1000;
 + spi@11 {
 + flash@0 {
 + #address-cells = 1;
 + #size-cells = 1;
 + compatible = spansion,s25sl12801;
 + reg = 0;
 + spi-max-frequency = 4000; /* input clock 
 */
 + partition@u-boot {
 + label = SPI U-Boot;
 + reg = 0x 0x0010;
 + read-only;
 + };
 + partition@kernel {
 + label = SPI Kernel;
 + reg = 0x0010 0x0050;
 + read-only;
 + };
 + partition@dtb {
 + label = SPI DTB;
 + reg = 0x0060 0x0010;
 + read-only;
 + };
 + partition@fs {
 + label = SPI File System;
 + reg = 0x0070 0x0090;
 + };

These are not valid unit addresses -- and the kernel/dtb should not be
read-only.  Please consider fixing where you copied this from, as well.

Do you really want to dedicate a whole mebibyte to the dtb, given that
the flash is only 16 MiB total?

Actually, let's please just stop putting partitions in the dts.  Either
use mtdparts on the command line, or have U-Boot fill in the partition
info (there is code in U-Boot to do this based on the mtdparts env
variable).

 diff --git a/arch/powerpc/include/asm/mpc85xx.h 
 b/arch/powerpc/include/asm/mpc85xx.h
 index 736d4ac..3bef74a 100644
 --- a/arch/powerpc/include/asm/mpc85xx.h
 +++ b/arch/powerpc/include/asm/mpc85xx.h
 @@ -77,6 +77,8 @@
  #define SVR_T10200x852100
  #define SVR_T10210x852101
  #define SVR_T10220x852102
 +#define SVR_T20800x853000
 +#define SVR_T20810x853100
  
  #define SVR_8610 0x80A000
  #define SVR_8641 0x809000

Why is this in the board device tree patch?

-Scott


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH] DTS: DMA: Fix DMA3 interrupts

2013-12-17 Thread Scott Wood
On Tue, 2013-12-17 at 16:48 +0800, Li Yang wrote:
 On Wed, Dec 11, 2013 at 2:33 AM, Scott Wood scottw...@freescale.com wrote:
  On Tue, 2013-12-10 at 18:33 +0800, Hongbo Zhang wrote:
  Scott,
  This issue is due to the non-continuous MPIC register, I think there is
  two ways to fix it.
 
  The first one is as what we are discussing, in fact the Bman/Qman DT
  author had introduced this way, and I had to follow it, it is a trick,
  adding 208 is a bit ugly I think, and even difficult to explain it to
  customers etc, but this way changes less codes.
 
  The second one is editing MPIC related codes without adding 208 to high
  interrupts. The point of translate interrupt number to MPIC register
  address is a so called 'isu' mechanism, we can do like the following
  example codes, then the tricky adding 208 isn't needed any more.
 
  Which one do you prefer?
  In fact I myself prefer the second, if the idea is acceptable, I will
  send a patch instead of this one. (and also alone with the internal
  patch decreasing 208 for the Bman/Qman)
 
  void __init corenet_ds_pic_init(void)
  {
   ..
 
   mpic = mpic_alloc(NULL, 0, flags, 0, 512, OpenPIC);
   BUG_ON(mpic == NULL);
 
  // Add this start
   for (i = 0; i  17; i++) {
   if (i  11)
   addr_off = 0x1 + 0x20 * 16 * i;
   else
   addr_off = 0x13000 + 0x20 * 16 * (i - 11);  /* scape the
  address not for interrupts */
   mpic_assign_isu(mpic, i, mpic-paddr + addr_off);
   }
  // Add this end
 
   mpic_init(mpic);
  }
 
  NACK
 
  We already have a binding that states that the interrupt number is based
  on the register offset, rather than whatever arbitrary numbers hardware
  documenters decide to use next week.
 
  While I'm not terribly happy with the usability of this, especially now
  that it's not a simple add 16, redefining the existing binding is not
  OK (and in any case the code above seems obfuscatory).  If we decide to
  do something other than continue with register offset divided by 32,
  then we need to define a new interrupt type (similar to current defined
  types of error interrupt, timer, and IPI) for the new numberspace -- and
  it should be handled when decoding that type of interrupt specifier,
  rather than with the isu mechanism.
 
 I had to say that the current binding is terribly confusing.  I know a
 lot of people who were confused while looking into the device tree and
 I had to help them out now and then.  I even got confused for some
 time at the very beginning.  :(  If we want to keep the current
 binding, a big warning message is well deserved at the very beginning
 of the binding document to clarify that the interrupt number used in
 device tree has nothing to do with the interrupt number mentioned in
 the silicon reference manuals.  I think we should even mention the
 add 16 magic rule also to make it more intuitive.

I suggested some explanatory text in another thread.

 Adding a new interrupt type for external interrupts is good to make
 the interrupt number consistent with the Reference Manuals.  But doing
 that requires changing all the device trees that used the previous
 binding, 

We may want to change them all for clarity, but there's no compatibility
issue.  Old device trees must continue to be supported.

 and we need a mechanism of judging which binding a device
 tree is complying to.

If it uses the new interrupt type number, then it's the new binding.
Otherwise, it's the old.  The binding would only be added to, not
changed.

I'm not convinced it's worth adding at this point, though -- let's add
some helpful text to the binding and see how much that helps.

-Scott


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH] powerpc: Make 64-bit non-VMX __copy_tofrom_user bi-endian

2013-12-17 Thread Anton Blanchard
From: Paul E. McKenney paul...@linux.vnet.ibm.com

The powerpc 64-bit __copy_tofrom_user() function uses shifts to handle
unaligned invocations.  However, these shifts were designed for
big-endian systems: On little-endian systems, they must shift in the
opposite direction.

This commit relies on the C preprocessor to insert the correct shifts
into the assembly code.

[ This is a rare but nasty LE issue. Most of the time we use the POWER7
optimised __copy_tofrom_user_power7 loop, but when it hits an exception
we fall back to the base __copy_tofrom_user loop. - Anton ]

Signed-off-by: Paul E. McKenney paul...@linux.vnet.ibm.com
Signed-off-by: Anton Blanchard an...@samba.org
---

diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S
index d73a590..596a285 100644
--- a/arch/powerpc/lib/copyuser_64.S
+++ b/arch/powerpc/lib/copyuser_64.S
@@ -9,6 +9,14 @@
 #include asm/processor.h
 #include asm/ppc_asm.h
 
+#ifdef __BIG_ENDIAN__
+#define sLd sld/* Shift towards low-numbered address. */
+#define sHd srd/* Shift towards high-numbered address. */
+#else
+#define sLd srd/* Shift towards low-numbered address. */
+#define sHd sld/* Shift towards high-numbered address. */
+#endif
+
.align  7
 _GLOBAL(__copy_tofrom_user)
 BEGIN_FTR_SECTION
@@ -118,10 +126,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
 
 24:ld  r9,0(r4)/* 3+2n loads, 2+2n stores */
 25:ld  r0,8(r4)
-   sld r6,r9,r10
+   sLd r6,r9,r10
 26:ldu r9,16(r4)
-   srd r7,r0,r11
-   sld r8,r0,r10
+   sHd r7,r0,r11
+   sLd r8,r0,r10
or  r7,r7,r6
blt cr6,79f
 27:ld  r0,8(r4)
@@ -129,35 +137,35 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
 
 28:ld  r0,0(r4)/* 4+2n loads, 3+2n stores */
 29:ldu r9,8(r4)
-   sld r8,r0,r10
+   sLd r8,r0,r10
addir3,r3,-8
blt cr6,5f
 30:ld  r0,8(r4)
-   srd r12,r9,r11
-   sld r6,r9,r10
+   sHd r12,r9,r11
+   sLd r6,r9,r10
 31:ldu r9,16(r4)
or  r12,r8,r12
-   srd r7,r0,r11
-   sld r8,r0,r10
+   sHd r7,r0,r11
+   sLd r8,r0,r10
addir3,r3,16
beq cr6,78f
 
 1: or  r7,r7,r6
 32:ld  r0,8(r4)
 76:std r12,8(r3)
-2: srd r12,r9,r11
-   sld r6,r9,r10
+2: sHd r12,r9,r11
+   sLd r6,r9,r10
 33:ldu r9,16(r4)
or  r12,r8,r12
 77:stdur7,16(r3)
-   srd r7,r0,r11
-   sld r8,r0,r10
+   sHd r7,r0,r11
+   sLd r8,r0,r10
bdnz1b
 
 78:std r12,8(r3)
or  r7,r7,r6
 79:std r7,16(r3)
-5: srd r12,r9,r11
+5: sHd r12,r9,r11
or  r12,r8,r12
 80:std r12,24(r3)
bne 6f
@@ -165,23 +173,38 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
blr
 6: cmpwi   cr1,r5,8
addir3,r3,32
-   sld r9,r9,r10
+   sLd r9,r9,r10
ble cr1,7f
 34:ld  r0,8(r4)
-   srd r7,r0,r11
+   sHd r7,r0,r11
or  r9,r7,r9
 7:
bf  cr7*4+1,1f
+#ifdef __BIG_ENDIAN__
rotldi  r9,r9,32
+#endif
 94:stw r9,0(r3)
+#ifdef __LITTLE_ENDIAN__
+   rotrdi  r9,r9,32
+#endif
addir3,r3,4
 1: bf  cr7*4+2,2f
+#ifdef __BIG_ENDIAN__
rotldi  r9,r9,16
+#endif
 95:sth r9,0(r3)
+#ifdef __LITTLE_ENDIAN__
+   rotrdi  r9,r9,16
+#endif
addir3,r3,2
 2: bf  cr7*4+3,3f
+#ifdef __BIG_ENDIAN__
rotldi  r9,r9,8
+#endif
 96:stb r9,0(r3)
+#ifdef __LITTLE_ENDIAN__
+   rotrdi  r9,r9,8
+#endif
 3: li  r3,0
blr
 
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH v2] powerpc 8xx: Loading kernels over 8Mbytes without CONFIG_PIN_TLB

2013-12-17 Thread Scott Wood
On Tue, 2013-12-17 at 06:54 +0100, leroy christophe wrote:
 Le 16/12/2013 23:57, Scott Wood a écrit :
  On Wed, 2013-12-11 at 00:36 +0100, leroy christophe wrote:
  Le 11/12/2013 00:18, Scott Wood a écrit :
  There wasn't previously an ifdef specifically around the setting of
  SPRN_MD_CTR.  That's new.  There was an ifdef around the entire block,
  which has gone away because you are now trying to map more than 8M
  regardless of CONFIG_PIN_TLB, but that has nothing to do with whether
  there should be an ifdef around SPRN_MD_CTR.
 
 
  Euh, ok, but then we have to fix it in the whole function, not only in
  this block. Do you think it is worth doing it ?
  Fix what in the whole function?  I was asking what harm there would be
  if you just remove all the CONFIG_PIN_TLB ifdefs except around the
  actual RSV4I setting -- do we really care what value goes in MD_CTR for
  the non-pinned case, as long as it's different for each entry?
 
 
 MD_CTR is decremented after each entry added.
 However, the function populates entry 28, then 29, then 30, then 31. At 
 the end MD_CTR has then value 30, ready to overide entry 30 then 29 then 
 28 then 27 .
 
 So I will remove all the CONFIG_PIN_TLB, but I'll also have to fix the 
 value set in MD_CTR to start from 31, won't I ?

OK, so the answer is that we rely on autodecrement avoiding entries over
28 in the CONFIG_PIN_TLB case.  Leave the ifdefs, then.

 Do you have any comment/recommendation on my tentative v3 patch where I 
 have tried to implement based on the use of r7 as you recommended ?

I haven't reviewed it yet.

-Scott


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH V4 09/10] power8, perf: Change BHRB branch filter configuration

2013-12-17 Thread Michael Ellerman
On Fri, 2013-12-13 at 13:50 +0530, Anshuman Khandual wrote:
 On 12/09/2013 11:51 AM, Michael Ellerman wrote:
  
  As I said in my comments on version 3 which you ignored:
  
  I think it would be clearer if we actually checked for the 
  possibilities we
  allow and let everything else fall through, eg:
  
  Â Â Â Â Â Â Â Â /* Ignore user/kernel/hv bits */
  Â Â Â Â Â Â Â Â branch_sample_type = ~PERF_SAMPLE_BRANCH_PLM_ALL;
  
  Â Â Â Â Â Â Â Â if (branch_sample_type == PERF_SAMPLE_BRANCH_ANY)
  Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â return 0;
  
  Â Â Â Â Â Â Â Â if (branch_sample_type == PERF_SAMPLE_BRANCH_ANY_CALL)
  Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â return POWER8_MMCRA_IFM1;
  Â 
  Â Â Â Â Â Â Â Â if (branch_sample_type == PERF_SAMPLE_BRANCH_COND)
  Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â return POWER8_MMCRA_IFM3;
  Â Â Â Â Â Â Â Â 
  Â Â Â Â Â Â Â Â return -1;
  
 
 Hey Michael,
 
 This patch only adds support for the PERF_SAMPLE_BRANCH_COND filter, if the
 over all code flow does not clearly suggest that all combinations of any of
 these HW filters are invalid, then we can go with one more patch to clean
 that up before or after this patch but not here in this patch. Finally the
 code section here will look something like this. Does it sound good ?

Better, but not quite.

 static u64 power8_bhrb_filter_map(u64 branch_sample_type)
 {
 u64 pmu_bhrb_filter = 0;
 
 /* BHRB and regular PMU events share the same privilege state
  * filter configuration. BHRB is always recorded along with a
  * regular PMU event. As the privilege state filter is handled
  * in the basic PMC configuration of the accompanying regular
  * PMU event, we ignore any separate BHRB specific request.
  */
 
 /* Ignore user, kernel, hv bits */
 branch_sample_type = ~PERF_SAMPLE_BRANCH_PLM_ALL;
 
 if (branch_sample_type == PERF_SAMPLE_BRANCH_ANY)
 return pmu_bhrb_filter;

return 0;

 
 
 if (branch_sample_type == PERF_SAMPLE_BRANCH_ANY_CALL) {
 pmu_bhrb_filter |= POWER8_MMCRA_IFM1;
 return pmu_bhrb_filter;

return POWER8_MMCRA_IFM1;

 }
 
 if (branch_sample_type == PERF_SAMPLE_BRANCH_COND) {
 pmu_bhrb_filter |= POWER8_MMCRA_IFM3;
 return pmu_bhrb_filter;

return POWER8_MMCRA_IFM3;

 }
 
 /* Every thing else is unsupported */
 return -1;
 }

cheers


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] powerpc: Fix bad stack check in exception entry

2013-12-17 Thread Michael Ellerman
On Mon, 2013-12-16 at 15:12 +1100, Michael Neuling wrote:
 In EXCEPTION_PROLOG_COMMON() we check to see if the stack pointer (r1)
 is valid when coming from the kernel.  If it's not valid, we die but
 with a nice oops message.
 
 Currently we allocate a stack frame (subtract INT_FRAME_SIZE) before we
 check to see if the stack pointer is negative.  Unfortunately, this
 won't detect a bad stack where r1 is less than INT_FRAME_SIZE.

The key detail being that we *expect* a negative value, because kernel
addresses (0xc000) are negative.

cheers


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH 1/2] power7, perf: Make some new raw event codes available in sysfs

2013-12-17 Thread Michael Ellerman
On Fri, 2013-12-13 at 10:00 +0530, Anshuman Khandual wrote:
 On 12/13/2013 08:20 AM, Michael Ellerman wrote:
  On Wed, 2013-10-16 at 11:22 +0530, Anshuman Khandual wrote:
  This patch adds some more raw event codes into the existing list
  of event codes present in power7-events-list.h file. This tries
  to complete the list of events supported in Power7 and matches
  the raw event list with libpfm4 library.
  
  It's a bit annoying, but you also need to update the ABI document:
 
 What is annoying ? you need to be specific.

It's annoying that we have to update the ABI document.

Documentation/ABI/testing/sysfs-bus-event_source-devices-events
  
 
 The events listed under the following heading are events required to do
 CPI analysis.

No they are not, it doesn't say that anywhere in the file.

It happens that the events in there *now* are the CPI events, but that's because
they are the only ones that have been added.

 Description:POWER-systems specific performance monitoring event
 
 /sys/devices/cpu/events/PM_1PLUS_PPC_CMPL
 /sys/devices/cpu/events/PM_BRU_FIN
 /sys/devices/cpu/events/PM_BR_MPRED
 /sys/devices/cpu/events/PM_CMPLU_STALL

All events that appear in /sys/devices/cpu/events on powerpc should be listed
in the file.

cheers


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[PATCH] powerpc, perf: Configure BHRB filter before enabling PMU interrupts

2013-12-17 Thread Michael Ellerman
From: Anshuman Khandual khand...@linux.vnet.ibm.com

Right now the config_bhrb() PMU specific call happens after
write_mmcr0(), which actually enables the PMU for event counting and
interrupts. So there is a small window of time where the PMU and BHRB
runs without the required HW branch filter (if any) enabled in BHRB.

This can cause some of the branch samples to be collected through BHRB
without any filter applied and hence affects the correctness of
the results. This patch moves the BHRB config function call before
enabling interrupts.

Here are some data points captured via trace prints which depicts how we
could get PMU interrupts with BHRB filter NOT enabled with a standard
perf record command line (asking for branch record information as well).

$ perf record -j any_call ls

Before the patch:-

ls-1962  [003] d...  2065.299590: .perf_event_interrupt: MMCRA: 400
ls-1962  [003] d...  2065.299603: .perf_event_interrupt: MMCRA: 400
...

All the PMU interrupts before this point did not have the requested
HW branch filter enabled in the MMCRA.

ls-1962  [003] d...  2065.299647: .perf_event_interrupt: MMCRA: 4004000
ls-1962  [003] d...  2065.299662: .perf_event_interrupt: MMCRA: 4004000

After the patch:-

ls-1850  [008] d...   190.311828: .perf_event_interrupt: MMCRA: 4004000
ls-1850  [008] d...   190.311848: .perf_event_interrupt: MMCRA: 4004000

All the PMU interrupts have the requested HW BHRB branch filter
enabled in MMCRA.

Signed-off-by: Anshuman Khandual khand...@linux.vnet.ibm.com
[mpe: Fixed up whitespace and cleaned up changelog]
Signed-off-by: Michael Ellerman m...@ellerman.id.au
---
 arch/powerpc/perf/core-book3s.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

Ben this is a bug fix so would be nice for 3.13.

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 29b89e8..67cf220 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -1147,6 +1147,9 @@ static void power_pmu_enable(struct pmu *pmu)
mmcr0 = ebb_switch_in(ebb, cpuhw-mmcr[0]);
 
mb();
+   if (cpuhw-bhrb_users)
+   ppmu-config_bhrb(cpuhw-bhrb_filter);
+
write_mmcr0(cpuhw, mmcr0);
 
/*
@@ -1158,8 +1161,6 @@ static void power_pmu_enable(struct pmu *pmu)
}
 
  out:
-   if (cpuhw-bhrb_users)
-   ppmu-config_bhrb(cpuhw-bhrb_filter);
 
local_irq_restore(flags);
 }
-- 
1.8.3.2

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH v2] powerpc/powernv: Framework to log critical errors on powernv.

2013-12-17 Thread Michael Ellerman
On Mon, 2013-12-16 at 18:00 +0530, Deepthi Dharwar wrote:
 This patch provides error logging interfaces to report critical
 powernv error to FSP.
 All the required information to dump the error is collected
 at POWERNV level through error log interfaces
 and then pushed on to FSP.
 
 This also supports synchronous error logging in cases of
 PANIC, by passing OPAL_ERROR_PANIC as severity in
 elog_create() call.

Please make note of the fact that none of this code is currently used but will
be in a subsequent patch. When can we expect those patches?


 diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
 index 0f01308..1c5440a 100644
 --- a/arch/powerpc/include/asm/opal.h
 +++ b/arch/powerpc/include/asm/opal.h
 @@ -168,6 +168,7 @@ extern int opal_enter_rtas(struct rtas_args *args,
  #define OPAL_GET_MSG 85
  #define OPAL_CHECK_ASYNC_COMPLETION  86
  #define OPAL_SYNC_HOST_REBOOT87
 +#define OPAL_ELOG_SEND   88
  
  #ifndef __ASSEMBLY__
  
 @@ -260,6 +261,122 @@ enum OpalMessageType {
   OPAL_MSG_TYPE_MAX,
  };
  
 +/* Classification of Error/Events reporting type classification

Standard comment style for block comments is:

/*
 * Classification ...
 */

That applies to almost all of your comments in here.


 + * Platform Events/Errors: Report Machine Check Interrupt

I think these comments would be better inline with the values, eg:

/* Report Machine Check Interrupt */
OPAL_PLATFORM,

/* Report all I/O related events/errors */
OPAL_INPUT_OUTPUT,

etc.


Again that applies to most of your comments.

 + * INPUT_OUTPUT: Report all I/O related events/errors
 + * RESOURCE_DEALLOC: Hotplug events and errors
 + * MISC: Miscellanous error
 + * Field: error_events_type

What is this Field: thing about?

 + */
 +enum error_events {

If you're going to define an enum you should actually use it in the API, I
can't see anywhere you do?

If you do want to use an enum it should be opal_error_events.

 + OPAL_PLATFORM,
 + OPAL_INPUT_OUTPUT,
 + OPAL_RESOURCE_DEALLOC,
 + OPAL_MISC,
 +};
 +
 +/* OPAL Subsystem IDs listed for reporting events/errors
 + * Field: subsystem_id
 + */
 +
 +#define OPAL_PROCESSOR_SUBSYSTEM0x10
 +#define OPAL_MEMORY_SUBSYSTEM   0x20
 +#define OPAL_IO_SUBSYSTEM   0x30
 +#define OPAL_IO_DEVICES 0x40
 +#define OPAL_CEC_HARDWARE   0x50
 +#define OPAL_POWER_COOLING  0x60
 +#define OPAL_MISC_SUBSYSTEM 0x70
 +#define OPAL_SURVEILLANCE_ERR   0x7A
 +#define OPAL_PLATFORM_FIRMWARE  0x80
 +#define OPAL_SOFTWARE   0x90
 +#define OPAL_EXTERNAL_ENV   0xA0
 +
 +/* During reporting an event/error the following represents
 + * how serious the logged event/error is. (Severity)
 + * Field: event_sev
 + */
 +#define OPAL_INFO   0x00
 +#define OPAL_RECOVERED_ERR_GENERAL  0x10
 +
 +/* 0x2X series is to denote set of Predictive Error
 + * 0x20 Generic predictive error
 + * 0x21 Predictive error, degraded performance
 + * 0x22 Predictive error, fault may be corrected after reboot
 + * 0x23 Predictive error, fault may be corrected after reboot,
 + * degraded performance
 + * 0x24 Predictive error, loss of redundancy
 + */
 +#define OPAL_PREDICTIVE_ERR_GENERAL 0x20
 +#define OPAL_PREDICTIVE_ERR_DEGRADED_PERF   0x21
 +#define OPAL_PREDICTIVE_ERR_FAULT_RECTIFY_REBOOT0x22
 +#define OPAL_PREDICTIVE_ERR_FAULT_RECTIFY_BOOT_DEGRADE_PERF 0x23
 +#define OPAL_PREDICTIVE_ERR_LOSS_OF_REDUNDANCY  0x24
 +
 +/* 0x4X series for Unrecoverable Error
 + * 0x40 Generic Unrecoverable error
 + * 0x41 Unrecoverable error bypassed with degraded performance
 + * 0x44 Unrecoverable error bypassed with loss of redundancy
 + * 0x45 Unrecoverable error bypassed with loss of redundancy and performance
 + * 0x48 Unrecoverable error bypassed with loss of function
 + */
 +#define OPAL_UNRECOVERABLE_ERR_GENERAL  0x40
 +#define OPAL_UNRECOVERABLE_ERR_DEGRADE_PERF 0x41
 +#define OPAL_UNRECOVERABLE_ERR_LOSS_REDUNDANCY  0x44
 +#define OPAL_UNRECOVERABLE_ERR_LOSS_REDUNDANCY_PERF 0x45
 +#define OPAL_UNRECOVERABLE_ERR_LOSS_OF_FUNCTION 0x48
 +#define OPAL_ERROR_PANIC0x50
 +
 +/* Event Sub-type
 + * This field provides additional information on the non-error
 + * event type
 + * Field: event_subtype
 + */
 +#define OPAL_NA 0x00
 +#define OPAL_MISCELLANEOUS_INFO_ONLY0x01
 +#define OPAL_PREV_REPORTED_ERR_RECTIFIED0x10
 +#define OPAL_SYS_RESOURCES_DECONFIG_BY_USER 0x20
 +#define OPAL_SYS_RESOURCE_DECONFIG_PRIOR_ERR0x21
 +#define OPAL_RESOURCE_DEALLOC_EVENT_NOTIFY

Re: [v6][PATCH 2/5] powerpc/book3e: store crit/mc/dbg exception thread info

2013-12-17 Thread Scott Wood
On Wed, 2013-10-23 at 17:31 +0800, Tiejun Chen wrote:
 We need to store thread info to these exception thread info like something
 we already did for PPC32.
 
 Signed-off-by: Tiejun Chen tiejun.c...@windriver.com
 ---
  arch/powerpc/kernel/exceptions-64e.S |   22 +++---
  1 file changed, 19 insertions(+), 3 deletions(-)
 
 diff --git a/arch/powerpc/kernel/exceptions-64e.S 
 b/arch/powerpc/kernel/exceptions-64e.S
 index 68d74b4..a55cf62 100644
 --- a/arch/powerpc/kernel/exceptions-64e.S
 +++ b/arch/powerpc/kernel/exceptions-64e.S
 @@ -36,6 +36,19 @@
   */
  #define  SPECIAL_EXC_FRAME_SIZE  INT_FRAME_SIZE
  
 +/* Now we only store something to exception thread info */
 +#define  EXC_LEVEL_EXCEPTION_PROLOG(type)
 \
 + ld  r14,PACAKSAVE(r13); \
 + CURRENT_THREAD_INFO(r14, r14);  \
 + CURRENT_THREAD_INFO(r15, r1);   \
 + ld  r10,TI_FLAGS(r14);  \
 + std r10,TI_FLAGS(r15);  \
 + ld  r10,TI_PREEMPT(r14);\
 + std r10,TI_PREEMPT(r15);\
 + ld  r10,TI_TASK(r14);   \
 + std r10,TI_TASK(r15);
 +
 +
  /* Exception prolog code for all exceptions */
  #define EXCEPTION_PROLOG(n, intnum, type, addition)  \
   mtspr   SPRN_SPRG_##type##_SCRATCH,r13; /* get spare registers */   \
 @@ -69,19 +82,22 @@
  
  #define CRIT_SET_KSTACK  
 \
   ld  r1,PACA_CRIT_STACK(r13);\
 - subir1,r1,SPECIAL_EXC_FRAME_SIZE;
 + subir1,r1,SPECIAL_EXC_FRAME_SIZE;   \
 + EXC_LEVEL_EXCEPTION_PROLOG(CRIT);
  #define SPRN_CRIT_SRR0   SPRN_CSRR0
  #define SPRN_CRIT_SRR1   SPRN_CSRR1
  
  #define DBG_SET_KSTACK   
 \
   ld  r1,PACA_DBG_STACK(r13); \
 - subir1,r1,SPECIAL_EXC_FRAME_SIZE;
 + subir1,r1,SPECIAL_EXC_FRAME_SIZE;   \
 + EXC_LEVEL_EXCEPTION_PROLOG(DBG);
  #define SPRN_DBG_SRR0SPRN_DSRR0
  #define SPRN_DBG_SRR1SPRN_DSRR1
  
  #define MC_SET_KSTACK
 \
   ld  r1,PACA_MC_STACK(r13);  \
 - subir1,r1,SPECIAL_EXC_FRAME_SIZE;
 + subir1,r1,SPECIAL_EXC_FRAME_SIZE;   \
 + EXC_LEVEL_EXCEPTION_PROLOG(MC);
  #define SPRN_MC_SRR0 SPRN_MCSRR0
  #define SPRN_MC_SRR1 SPRN_MCSRR1
  

We should rename these if they're going to do more than set up a stack.

-Scott


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [v6][PATCH 5/5] powerpc/book3e/kgdb: Fix a single stgep case of lazy IRQ

2013-12-17 Thread Scott Wood
On Wed, 2013-10-23 at 17:31 +0800, Tiejun Chen wrote:
 In lazy EE magic, we may have a lazy interrupt occured while
 entering kgdb, but we really don't want to replay that interrupt
 for kgdb, so we have to clear the PACA_IRQ_HARD_DIS force to
 make sure we can exit directly from this debug exception.
 
 Signed-off-by: Tiejun Chen tiejun.c...@windriver.com

s/stgep/step/ in subject

 ---
  arch/powerpc/kernel/kgdb.c |8 
  1 file changed, 8 insertions(+)
 
 diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
 index 447c14b..9872f58 100644
 --- a/arch/powerpc/kernel/kgdb.c
 +++ b/arch/powerpc/kernel/kgdb.c
 @@ -185,6 +185,14 @@ static int kgdb_singlestep(struct pt_regs *regs)
   /* Restore current_thread_info lastly. */
   memcpy(exception_thread_info, backup_current_thread_info, 
 sizeof *thread_info);
  
 +#ifdef CONFIG_PPC64
 + /*
 +  * Clear the PACA_IRQ_HARD_DIS from the pending mask
 +  * since we are about to exit this directly from debug
 +  * exception without any replay interrupt in lazy EE case.
 +  */
 + local_paca-irq_happened = ~PACA_IRQ_HARD_DIS;
 +#endif
   return 1;
  }
  

What happens to those interrupts you discarded once we get back to a
state when they can be safely replayed?  I don't think just dropping
them is the answer.

I'm not sure what the actual problem is.  I can understand not wanting
kgdb to cause interrupts to appear to run when the interrupted context
has external interrupts disabled, but the replay code in entry_64.S
doesn't run if interrupts are soft-disabled in the context to be
returned to.  What harm does it cause to run the interrupts if we're
returning to an EE=1 context?

Does KGDB enable interrupts in its handler?  If not, how do we even get
into the situation where there are interrupts pending when the
interrupted context has EE soft-enabled (i.e. we went directly from a
context where the interrupt handler should have run, to a hard-disabled
context)?

-Scott


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [v3][PATCH 1/8] powerpc/book3e: rename interrupt_end_book3e with __end_interrupts

2013-12-17 Thread Scott Wood
On Tue, 2013-07-09 at 16:03 +0800, Tiejun Chen wrote:
 We can rename 'interrupt_end_book3e' with '__end_interrupts' then
 book3s/book3e can share this unique label to make sure we can use
 this conveniently.
 
 Signed-off-by: Tiejun Chen tiejun.c...@windriver.com

What users of this do you plan to share between book3s and book3e?  I'm
not seeing any existing book3s users that are obviously applicable to
book3e -- they mainly involve copying exception vectors which we
shouldn't need to do.

-Scott


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [v3][PATCH 2/8] powerpc/book3e: support CONFIG_RELOCATABLE

2013-12-17 Thread Scott Wood
On Tue, 2013-07-09 at 16:03 +0800, Tiejun Chen wrote:
 book3e is different with book3s since 3s includes the exception
 vectors code in head_64.S as it relies on absolute addressing
 which is only possible within this compilation unit. So we have
 to get that label address with got.
 
 And when boot a relocated kernel, we should reset ipvr properly again
 after .relocate.

ivpr?

 
 Signed-off-by: Tiejun Chen tiejun.c...@windriver.com
 ---
  arch/powerpc/include/asm/exception-64e.h |   11 +++
  arch/powerpc/kernel/exceptions-64e.S |   18 +-
  arch/powerpc/kernel/head_64.S|   25 +
  3 files changed, 53 insertions(+), 1 deletion(-)
 
 diff --git a/arch/powerpc/include/asm/exception-64e.h 
 b/arch/powerpc/include/asm/exception-64e.h
 index 51fa43e..371a77f 100644
 --- a/arch/powerpc/include/asm/exception-64e.h
 +++ b/arch/powerpc/include/asm/exception-64e.h
 @@ -214,10 +214,21 @@ exc_##label##_book3e:
  #define TLB_MISS_STATS_SAVE_INFO_BOLTED
  #endif
  
 +#ifndef CONFIG_RELOCATABLE

Please use positive logic (ifdef/else, rather than ifndef/else).

  #define SET_IVOR(vector_number, vector_offset)   \
   li  r3,vector_offset@l; \
   ori r3,r3,interrupt_base_book3e@l;  \
   mtspr   SPRN_IVOR##vector_number,r3;
 +#else /* !CONFIG_RELOCATABLE */
 +/* In relocatable case the value of the constant expression 'expr' is only
 + * offset. So instead, we should loads the address of label 'name'.
 + */
 +#define SET_IVOR(vector_number, vector_offset)   \
 + LOAD_REG_ADDR(r3,interrupt_base_book3e);\
 + rlwinm  r3,r3,0,15,0;   \
 + ori r3,r3,vector_offset@l;  \
 + mtspr   SPRN_IVOR##vector_number,r3;
 +#endif /* CONFIG_RELOCATABLE */

Please use the more readable 4-operand version of rlwinm.

Is there a reason why this new code is only used with
CONFIG_RELOCATABLE?  If @got doesn't work without CONFIG_RELOCATABLE,
then the ifdef should be pushed into LOAD_REG_ADDR.

Likewise with other ifdefs on CONFIG_RELOCATABLE.

 -_STATIC(init_core_book3e)
 +_GLOBAL(init_core_book3e)
   /* Establish the interrupt vector base */
 +#ifdef CONFIG_RELOCATABLE
 +/* In relocatable case the value of the constant expression 'expr' is only
 + * offset. So instead, we should loads the address of label 'name'.
 + */
 + tovirt(r2,r2)
 + LOAD_REG_ADDR(r3, interrupt_base_book3e)
 +#else
   LOAD_REG_IMMEDIATE(r3, interrupt_base_book3e)
 +#endif

I'm having a hard time parsing the comment.  Plus, it feels wrong to
decouple tovirt(r2,r2) from the call to relative_toc.

   mtspr   SPRN_IVPR,r3
   sync
   blr
 diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
 index b61363d..550f8fb 100644
 --- a/arch/powerpc/kernel/head_64.S
 +++ b/arch/powerpc/kernel/head_64.S
 @@ -414,12 +414,25 @@ _STATIC(__after_prom_start)
   /* process relocations for the final address of the kernel */
   lis r25,PAGE_OFFSET@highest /* compute virtual base of kernel */
   sldir25,r25,32
 +#if defined(CONFIG_PPC_BOOK3E)
 + tovirt(r26,r26) /* on booke, we already run at 
 PAGE_OFFSET */
 +#endif
   lwz r7,__run_at_load-_stext(r26)
 +#if defined(CONFIG_PPC_BOOK3E)
 + tophys(r26,r26) /* Restore for the remains. */
 +#endif
   cmplwi  cr0,r7,1/* flagged to stay where we are ? */
   bne 1f
   add r25,r25,r26
  1:   mr  r3,r25
   bl  .relocate
 +#if defined(CONFIG_PPC_BOOK3E)
 + /* In relocatable case we always have to load the address of label 
 'name'
 +  * to set IVPR. So after .relocate we have to update IVPR with current
 +  * address of label.
 +  */
 + bl  .init_core_book3e
 +#endif

Maybe this function should be renamed to something ivpr-specific, so
nothing else gets added there.

  #endif
  
  /*
 @@ -447,12 +460,24 @@ _STATIC(__after_prom_start)
   * variable __run_at_load, if it is set the kernel is treated as relocatable
   * kernel, otherwise it will be moved to PHYSICAL_START
   */
 +#if defined(CONFIG_PPC_BOOK3E)
 + tovirt(r26,r26) /* on booke, we already run at 
 PAGE_OFFSET */
 +#endif
   lwz r7,__run_at_load-_stext(r26)
 +#if defined(CONFIG_PPC_BOOK3E)
 + tophys(r26,r26) /* Restore for the remains. */
 +#endif
   cmplwi  cr0,r7,1
   bne 3f
  
 +#ifdef CONFIG_PPC_BOOK3E
 + LOAD_REG_ADDR(r5, __end_interrupts)
 + LOAD_REG_ADDR(r11, _stext)
 + sub r5,r5,r11
 +#else
   /* just copy interrupts */
   LOAD_REG_IMMEDIATE(r5, __end_interrupts - _stext)
 +#endif

Can't we skip the interrupt copying on book3e?  And if not for some
reason, why start at _stext?

-Scott


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [v3][PATCH 3/8] book3e/kexec/kdump: enable kexec for kernel

2013-12-17 Thread Scott Wood
On Tue, 2013-07-09 at 16:03 +0800, Tiejun Chen wrote:
 We need to active KEXEC for book3e and bypass or convert non-book3e stuff
 in kexec coverage.
 
 Signed-off-by: Tiejun Chen tiejun.c...@windriver.com
 ---
  arch/powerpc/Kconfig   |2 +-
  arch/powerpc/kernel/machine_kexec_64.c |  148 
 ++--
  arch/powerpc/kernel/misc_64.S  |6 ++
  3 files changed, 89 insertions(+), 67 deletions(-)
 
 diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
 index 5374776..d945435 100644
 --- a/arch/powerpc/Kconfig
 +++ b/arch/powerpc/Kconfig
 @@ -357,7 +357,7 @@ config ARCH_ENABLE_MEMORY_HOTREMOVE
  
  config KEXEC
   bool kexec system call
 - depends on (PPC_BOOK3S || FSL_BOOKE || (44x  !SMP))
 + depends on (PPC_BOOK3S || FSL_BOOKE || (44x  !SMP)) || PPC_BOOK3E

Please remove the outher parentheses, and especially don't put
PPC_BOOK3E on the outside of them when there's no reason to group the
other items together.

 @@ -367,6 +301,87 @@ void default_machine_kexec(struct kimage *image)
   /* NOTREACHED */
  }
  
 +#ifdef CONFIG_PPC_BOOK3E
 +int default_machine_kexec_prepare(struct kimage *image)
 +{
 + int i;
 + /*
 +  * Since we use the kernel fault handlers and paging code to
 +  * handle the virtual mode, we must make sure no destination
 +  * overlaps kernel static data or bss.
 +  */
 + for (i = 0; i  image-nr_segments; i++)
 + if (image-segment[i].mem  __pa(_end))
 + return -ETXTBSY;
 + return 0;

Factor out this common code rather than duplicate it.

-Scott


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [v3][PATCH 4/8] book3e/kexec/kdump: create a 1:1 TLB mapping

2013-12-17 Thread Scott Wood
On Tue, 2013-07-09 at 16:03 +0800, Tiejun Chen wrote:
 book3e have no real MMU mode so we have to create a 1:1 TLB
 mapping to make sure we can access the real physical address.
 And correct something to support this pseudo real mode on book3e.
 
 Signed-off-by: Tiejun Chen tiejun.c...@windriver.com

Why do we need to be able to directly access physical addresses?

 diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
 index f1a7ce7..20cbb98 100644
 --- a/arch/powerpc/kernel/misc_64.S
 +++ b/arch/powerpc/kernel/misc_64.S
 @@ -460,6 +460,49 @@ kexec_flag:
  
 
  #ifdef CONFIG_KEXEC
 +#ifdef CONFIG_PPC_BOOK3E
 +/* BOOK3E have no a real MMU mode so we have to setup the initial TLB
 + * for a core to map v:0 to p:0 as 1:1. This current implementation
 + * assume that 1G is enough for kexec.
 + */
 +#include asm/mmu.h

#includes go at the top of the file.

 +kexec_create_tlb:
 + /* Invalidate all TLBs to avoid any TLB conflict. */
 + PPC_TLBILX_ALL(0,R0)
 + sync
 + isync
 +
 + mfspr   r10,SPRN_TLB1CFG
 + andi.   r10,r10,TLBnCFG_N_ENTRY /* Extract # entries */
 + subir10,r10,1   /* Often its always safe to use last */
 + lis r9,MAS0_TLBSEL(1)@h
 + rlwimi  r9,r10,16,4,15  /* Setup MAS0 = TLBSEL | ESEL(r9) */

Hardcoding TLB1 makes this FSL-specific code, but you've put it in a
non-FSL-specific place.

 +/* Setup a temp mapping v:0 to p:0 as 1:1 and return to it.
 + */
 +#ifdef CONFIG_SMP
 +#define M_IF_SMP MAS2_M
 +#else
 +#define M_IF_SMP 0
 +#endif
 + mtspr   SPRN_MAS0,r9
 +
 + lis r9,(MAS1_VALID|MAS1_IPROT)@h
 + ori r9,r9,(MAS1_TSIZE(BOOK3E_PAGESZ_1GB))@l
 + mtspr   SPRN_MAS1,r9

What if the machine has less than 1 GiB of RAM?  We could get
speculative accesses to non-present addresses.

Though it looks like the normal 64-bit init sequence has the same
problem...

-Scott


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH] powerpc, perf: Configure BHRB filter before enabling PMU interrupts

2013-12-17 Thread Anshuman Khandual
On 12/18/2013 07:44 AM, Michael Ellerman wrote:
 From: Anshuman Khandual khand...@linux.vnet.ibm.com
 
 Right now the config_bhrb() PMU specific call happens after
 write_mmcr0(), which actually enables the PMU for event counting and
 interrupts. So there is a small window of time where the PMU and BHRB
 runs without the required HW branch filter (if any) enabled in BHRB.
 
 This can cause some of the branch samples to be collected through BHRB
 without any filter applied and hence affects the correctness of
 the results. This patch moves the BHRB config function call before
 enabling interrupts.
 
 Here are some data points captured via trace prints which depicts how we
 could get PMU interrupts with BHRB filter NOT enabled with a standard
 perf record command line (asking for branch record information as well).
 
 $ perf record -j any_call ls
 
 Before the patch:-
 
 ls-1962  [003] d...  2065.299590: .perf_event_interrupt: MMCRA: 
 400
 ls-1962  [003] d...  2065.299603: .perf_event_interrupt: MMCRA: 
 400
 ...
 
 All the PMU interrupts before this point did not have the requested
 HW branch filter enabled in the MMCRA.
 
 ls-1962  [003] d...  2065.299647: .perf_event_interrupt: MMCRA: 
 4004000
 ls-1962  [003] d...  2065.299662: .perf_event_interrupt: MMCRA: 
 4004000
 
 After the patch:-
 
 ls-1850  [008] d...   190.311828: .perf_event_interrupt: MMCRA: 
 4004000
 ls-1850  [008] d...   190.311848: .perf_event_interrupt: MMCRA: 
 4004000
 
 All the PMU interrupts have the requested HW BHRB branch filter
 enabled in MMCRA.
 
 Signed-off-by: Anshuman Khandual khand...@linux.vnet.ibm.com
 [mpe: Fixed up whitespace and cleaned up changelog]

Thanks Michael for cleaning and resending this out.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [v3][PATCH 5/8] book3e/kexec/kdump: introduce a kexec kernel flag

2013-12-17 Thread Scott Wood
On Tue, 2013-07-09 at 16:03 +0800, Tiejun Chen wrote:
 We need to introduce a flag to indicate we're already running
 a kexec kernel then we can go proper path. For example, We
 shouldn't access spin_table from the bootloader to up any secondary
 cpu for kexec kernel, and kexec kernel already know how to jump to
 generic_secondary_smp_init.
 
 Signed-off-by: Tiejun Chen tiejun.c...@windriver.com
 ---
  arch/powerpc/include/asm/smp.h|1 +
  arch/powerpc/kernel/head_64.S |   10 ++
  arch/powerpc/kernel/misc_64.S |6 ++
  arch/powerpc/platforms/85xx/smp.c |   20 +++-
  4 files changed, 32 insertions(+), 5 deletions(-)
 
 diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
 index ffbaabe..59165a3 100644
 --- a/arch/powerpc/include/asm/smp.h
 +++ b/arch/powerpc/include/asm/smp.h
 @@ -200,6 +200,7 @@ extern void generic_secondary_thread_init(void);
  extern unsigned long __secondary_hold_spinloop;
  extern unsigned long __secondary_hold_acknowledge;
  extern char __secondary_hold;
 +extern unsigned long __run_at_kexec;
  
  extern void __early_start(void);
  #endif /* __ASSEMBLY__ */
 diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
 index 7dc56be..0b46c9d 100644
 --- a/arch/powerpc/kernel/head_64.S
 +++ b/arch/powerpc/kernel/head_64.S
 @@ -89,6 +89,10 @@ __secondary_hold_spinloop:
  __secondary_hold_acknowledge:
   .llong  0x0
  
 + .globl  __run_at_kexec
 +__run_at_kexec:
 + .llong  0x0 /* Flag for the secondary kernel from kexec. */
 +

No leading underscores please -- and why does this need to be 64-bit?

  #ifdef CONFIG_RELOCATABLE
   /* This flag is set to 1 by a loader if the kernel should run
* at the loaded address instead of the linked address.  This
 @@ -417,6 +421,12 @@ _STATIC(__after_prom_start)
  #if defined(CONFIG_PPC_BOOK3E)
   tovirt(r26,r26) /* on booke, we already run at 
 PAGE_OFFSET */
  #endif
 +#if defined(CONFIG_KEXEC) || defined(CONFIG_CRASH_DUMP)
 + /* If relocated we need to restore this flag on that relocated address. 
 */
 + ld  r7,__run_at_kexec-_stext(r26)
 + std r7,__run_at_kexec-_stext(r26)
 +#endif
 +
   lwz r7,__run_at_load-_stext(r26)
  #if defined(CONFIG_PPC_BOOK3E)
   tophys(r26,r26) /* Restore for the remains. */
 diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
 index 20cbb98..c89aead 100644
 --- a/arch/powerpc/kernel/misc_64.S
 +++ b/arch/powerpc/kernel/misc_64.S
 @@ -619,6 +619,12 @@ _GLOBAL(kexec_sequence)
   bl  .copy_and_flush /* (dest, src, copy limit, start offset) */
  1:   /* assume normal blr return */
  
 + /* notify we're going into kexec kernel for SMP. */
 + LOAD_REG_ADDR(r3,__run_at_kexec)
 + li  r4,1
 + std r4,0(r3)
 + sync
 +
   /* release other cpus to the new kernel secondary start at 0x60 */
   mflrr5
   li  r6,1
 diff --git a/arch/powerpc/platforms/85xx/smp.c 
 b/arch/powerpc/platforms/85xx/smp.c
 index 5ced4f5..14d461b 100644
 --- a/arch/powerpc/platforms/85xx/smp.c
 +++ b/arch/powerpc/platforms/85xx/smp.c
 @@ -150,6 +150,9 @@ static int smp_85xx_kick_cpu(int nr)
   int hw_cpu = get_hard_smp_processor_id(nr);
   int ioremappable;
   int ret = 0;
 +#ifdef CONFIG_PPC64
 + unsigned long *ptr = NULL;
 +#endif

Looks like an unnecessary initialization.

  
   WARN_ON(nr  0 || nr = NR_CPUS);
   WARN_ON(hw_cpu  0 || hw_cpu = NR_CPUS);
 @@ -238,11 +241,18 @@ out:
  #else
   smp_generic_kick_cpu(nr);
  
 - flush_spin_table(spin_table);
 - out_be32(spin_table-pir, hw_cpu);
 - out_be64((u64 *)(spin_table-addr_h),
 -   __pa((u64)*((unsigned long long *)generic_secondary_smp_init)));
 - flush_spin_table(spin_table);
 + ptr  = (unsigned long *)((unsigned long)__run_at_kexec);
 + /* We shouldn't access spin_table from the bootloader to up any
 +  * secondary cpu for kexec kernel, and kexec kernel already
 +  * know how to jump to generic_secondary_smp_init.
 +  */
 + if (!*ptr) {
 + flush_spin_table(spin_table);
 + out_be32(spin_table-pir, hw_cpu);
 + out_be64((u64 *)(spin_table-addr_h),
 +  __pa((u64)*((unsigned long long 
 *)generic_secondary_smp_init)));
 + flush_spin_table(spin_table);
 + }
  #endif

Please use a more descriptive name than ptr.

How is all that different than just:

if (!__run_at_kexec) {
...
}

-Scott


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [v3][PATCH 6/8] book3e/kexec/kdump: implement ppc64 kexec specfic

2013-12-17 Thread Scott Wood
On Tue, 2013-07-09 at 16:03 +0800, Tiejun Chen wrote:
 ppc64 kexec mechanism has a different implementation with ppc32.
 
 Signed-off-by: Tiejun Chen tiejun.c...@windriver.com

Could you describe the relevant differences?

-Scott


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [v3][PATCH 7/8] book3e/kexec/kdump: redefine VIRT_PHYS_OFFSET

2013-12-17 Thread Scott Wood
On Tue, 2013-07-09 at 16:03 +0800, Tiejun Chen wrote:
 Book3e is always aligned 1GB to create TLB so we should
 use (KERNELBASE - MEMORY_START) as VIRT_PHYS_OFFSET to
 get __pa/__va properly while boot kdump.

What if MEMORY_START - PHYSICAL_START = 1 GiB?

What about the comment that says we can't use MEMORY_START before
parsing the device tree?

-Scott


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [v3][PATCH 8/8] book3e/kexec/kdump: recover r4 = 0 to create the initial TLB

2013-12-17 Thread Scott Wood
On Tue, 2013-07-09 at 16:03 +0800, Tiejun Chen wrote:
 In commit 96f013f, powerpc/kexec: Add kexec hold support for Book3e
 processors, requires that GPR4 survive the hold process, for IBM Blue
 Gene/Q with with some very strange firmware. But for FSL Book3E, r4 = 1
 to indicate that the initial TLB entry for this core already exists so
 we still should set r4 with 0 to create that initial TLB.
 
 Signed-off-by: Tiejun Chen tiejun.c...@windriver.com
 ---
  arch/powerpc/kernel/head_64.S |4 
  1 file changed, 4 insertions(+)
 
 diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
 index 0b46c9d..d546c5e 100644
 --- a/arch/powerpc/kernel/head_64.S
 +++ b/arch/powerpc/kernel/head_64.S
 @@ -127,6 +127,10 @@ __secondary_hold:
   /* Grab our physical cpu number */
   mr  r24,r3
   /* stash r4 for book3e */
 +#ifdef CONFIG_PPC_FSL_BOOK3E
 + /* we need to setup initial TLB entry. */
 + li  r4,0
 +#endif
   mr  r25,r4

This breaks being able to build one kernel that supports both FSL book3e
and IBM book3e.

-Scott


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH V4 09/10] power8, perf: Change BHRB branch filter configuration

2013-12-17 Thread Anshuman Khandual
On 12/18/2013 05:38 AM, Michael Ellerman wrote:
 On Fri, 2013-12-13 at 13:50 +0530, Anshuman Khandual wrote:
 On 12/09/2013 11:51 AM, Michael Ellerman wrote:

 As I said in my comments on version 3 which you ignored:

 I think it would be clearer if we actually checked for the 
 possibilities we
 allow and let everything else fall through, eg:

 Â Â Â Â Â Â Â Â /* Ignore user/kernel/hv bits */
 Â Â Â Â Â Â Â Â branch_sample_type = ~PERF_SAMPLE_BRANCH_PLM_ALL;

 Â Â Â Â Â Â Â Â if (branch_sample_type == PERF_SAMPLE_BRANCH_ANY)
 Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â return 0;

 Â Â Â Â Â Â Â Â if (branch_sample_type == PERF_SAMPLE_BRANCH_ANY_CALL)
 Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â return POWER8_MMCRA_IFM1;
 Â 
 Â Â Â Â Â Â Â Â if (branch_sample_type == PERF_SAMPLE_BRANCH_COND)
 Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â return POWER8_MMCRA_IFM3;
 Â Â Â Â Â Â Â Â 
 Â Â Â Â Â Â Â Â return -1;


 Hey Michael,

 This patch only adds support for the PERF_SAMPLE_BRANCH_COND filter, if the
 over all code flow does not clearly suggest that all combinations of any of
 these HW filters are invalid, then we can go with one more patch to clean
 that up before or after this patch but not here in this patch. Finally the
 code section here will look something like this. Does it sound good ?
 
 Better, but not quite.
 
 static u64 power8_bhrb_filter_map(u64 branch_sample_type)
 {
 u64 pmu_bhrb_filter = 0;

 /* BHRB and regular PMU events share the same privilege state
  * filter configuration. BHRB is always recorded along with a
  * regular PMU event. As the privilege state filter is handled
  * in the basic PMC configuration of the accompanying regular
  * PMU event, we ignore any separate BHRB specific request.
  */

 /* Ignore user, kernel, hv bits */
 branch_sample_type = ~PERF_SAMPLE_BRANCH_PLM_ALL;

 if (branch_sample_type == PERF_SAMPLE_BRANCH_ANY)
 return pmu_bhrb_filter;
 
 return 0;
 


 if (branch_sample_type == PERF_SAMPLE_BRANCH_ANY_CALL) {
 pmu_bhrb_filter |= POWER8_MMCRA_IFM1;
 return pmu_bhrb_filter;
 
 return POWER8_MMCRA_IFM1;
 
 }

 if (branch_sample_type == PERF_SAMPLE_BRANCH_COND) {
 pmu_bhrb_filter |= POWER8_MMCRA_IFM3;
 return pmu_bhrb_filter;
 
 return POWER8_MMCRA_IFM3;
 
 }

 /* Every thing else is unsupported */
 return -1;
 }
 

Okay, will take these changes into another patch before adding conditional 
branch
filter here.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v12] PPC: POWERNV: move iommu_add_device earlier

2013-12-17 Thread Alexey Kardashevskiy
The current implementation of IOMMU on sPAPR does not use iommu_ops
and therefore does not call IOMMU API's bus_set_iommu() which
1) sets iommu_ops for a bus
2) registers a bus notifier
Instead, PCI devices are added to IOMMU groups from
subsys_initcall_sync(tce_iommu_init) which does basically the same
thing without using iommu_ops callbacks.

However Freescale PAMU driver (https://lkml.org/lkml/2013/7/1/158)
implements iommu_ops and when tce_iommu_init is called, every PCI device
is already added to some group so there is a conflict.

This patch does 2 things:
1. removes the loop in which PCI devices were added to groups and
adds explicit iommu_add_device() calls to add devices as soon as they get
the iommu_table pointer assigned to them.
2. moves a bus notifier to powernv code in order to avoid conflict with
the notifier from Freescale driver.

iommu_add_device() and iommu_del_device() are public now.

Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru
---
Changes:
v12:
* removed redundant bus notifier from common POWERPC code

v11:
* rebased on upstream

v10:
* fixed linker error when IOMMU_API is not enabled

v9:
* removed KVM from the subject as it is not really a KVM patch so
PPC mainainter (hi Ben!) can review/include it into his tree

v8:
* added the check for iommu_group!=NULL before removing device from a group
as suggested by Wei Yang weiy...@linux.vnet.ibm.com

v2:
* added a helper - set_iommu_table_base_and_group - which does
set_iommu_table_base() and iommu_add_device()
---
 arch/powerpc/include/asm/iommu.h| 26 ++
 arch/powerpc/kernel/iommu.c | 41 +++--
 arch/powerpc/platforms/powernv/pci-ioda.c   |  8 +++---
 arch/powerpc/platforms/powernv/pci-p5ioc2.c |  2 +-
 arch/powerpc/platforms/powernv/pci.c| 31 +-
 arch/powerpc/platforms/pseries/iommu.c  |  8 +++---
 6 files changed, 70 insertions(+), 46 deletions(-)

diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index c34656a..774fa27 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -101,8 +101,34 @@ extern void iommu_free_table(struct iommu_table *tbl, 
const char *node_name);
  */
 extern struct iommu_table *iommu_init_table(struct iommu_table * tbl,
int nid);
+#ifdef CONFIG_IOMMU_API
 extern void iommu_register_group(struct iommu_table *tbl,
 int pci_domain_number, unsigned long pe_num);
+extern int iommu_add_device(struct device *dev);
+extern void iommu_del_device(struct device *dev);
+#else
+static inline void iommu_register_group(struct iommu_table *tbl,
+   int pci_domain_number,
+   unsigned long pe_num)
+{
+}
+
+static inline int iommu_add_device(struct device *dev)
+{
+   return 0;
+}
+
+static inline void iommu_del_device(struct device *dev)
+{
+}
+#endif /* !CONFIG_IOMMU_API */
+
+static inline void set_iommu_table_base_and_group(struct device *dev,
+ void *base)
+{
+   set_iommu_table_base(dev, base);
+   iommu_add_device(dev);
+}
 
 extern int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
struct scatterlist *sglist, int nelems,
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 572bb5b..ecbf468 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -1105,7 +1105,7 @@ void iommu_release_ownership(struct iommu_table *tbl)
 }
 EXPORT_SYMBOL_GPL(iommu_release_ownership);
 
-static int iommu_add_device(struct device *dev)
+int iommu_add_device(struct device *dev)
 {
struct iommu_table *tbl;
int ret = 0;
@@ -1134,46 +1134,13 @@ static int iommu_add_device(struct device *dev)
 
return ret;
 }
+EXPORT_SYMBOL_GPL(iommu_add_device);
 
-static void iommu_del_device(struct device *dev)
+void iommu_del_device(struct device *dev)
 {
iommu_group_remove_device(dev);
 }
-
-static int iommu_bus_notifier(struct notifier_block *nb,
- unsigned long action, void *data)
-{
-   struct device *dev = data;
-
-   switch (action) {
-   case BUS_NOTIFY_ADD_DEVICE:
-   return iommu_add_device(dev);
-   case BUS_NOTIFY_DEL_DEVICE:
-   iommu_del_device(dev);
-   return 0;
-   default:
-   return 0;
-   }
-}
-
-static struct notifier_block tce_iommu_bus_nb = {
-   .notifier_call = iommu_bus_notifier,
-};
-
-static int __init tce_iommu_init(void)
-{
-   struct pci_dev *pdev = NULL;
-
-   BUILD_BUG_ON(PAGE_SIZE  IOMMU_PAGE_SIZE);
-
-   for_each_pci_dev(pdev)
-   iommu_add_device(pdev-dev);
-
-   bus_register_notifier(pci_bus_type, tce_iommu_bus_nb);
-   return 0;
-}
-
-subsys_initcall_sync(tce_iommu_init);
+EXPORT_SYMBOL_GPL(iommu_del_device);
 

Re: [PATCH 8/8] powerpc: Fix endian issues in crash dump code

2013-12-17 Thread Michael Ellerman
On Thu, 2013-12-12 at 15:59 +1100, Anton Blanchard wrote:
 A couple more device tree properties that need byte swapping.
 
 Signed-off-by: Anton Blanchard an...@samba.org
 ---
  arch/powerpc/kernel/crash_dump.c | 6 +++---
  1 file changed, 3 insertions(+), 3 deletions(-)
 
 diff --git a/arch/powerpc/kernel/crash_dump.c 
 b/arch/powerpc/kernel/crash_dump.c
 index 779a78c..11c1d06 100644
 --- a/arch/powerpc/kernel/crash_dump.c
 +++ b/arch/powerpc/kernel/crash_dump.c
 @@ -124,15 +124,15 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
  void crash_free_reserved_phys_range(unsigned long begin, unsigned long end)
  {
   unsigned long addr;
 - const u32 *basep, *sizep;
 + const __be32 *basep, *sizep;
   unsigned int rtas_start = 0, rtas_end = 0;
  
   basep = of_get_property(rtas.dev, linux,rtas-base, NULL);
   sizep = of_get_property(rtas.dev, rtas-size, NULL);
  
   if (basep  sizep) {
 - rtas_start = *basep;
 - rtas_end = *basep + *sizep;
 + rtas_start = be32_to_cpup(basep);
 + rtas_end = rtas_start + be32_to_cpup(sizep);
   }
  
   for (addr = begin; addr  end; addr += PAGE_SIZE) {

Not my favourite colour :D  What about this instead?

We could also add of_property_read_u32(), with an implied index of zero?

I don't like the rc handling, but couldn't come up with anything I liked
better.

cheers


diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c
index 11c1d06..60fd0aa 100644
--- a/arch/powerpc/kernel/crash_dump.c
+++ b/arch/powerpc/kernel/crash_dump.c
@@ -124,15 +124,16 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
 void crash_free_reserved_phys_range(unsigned long begin, unsigned long end)
 {
unsigned long addr;
-   const __be32 *basep, *sizep;
+   u32 base, size;
unsigned int rtas_start = 0, rtas_end = 0;
+   int rc;
 
-   basep = of_get_property(rtas.dev, linux,rtas-base, NULL);
-   sizep = of_get_property(rtas.dev, rtas-size, NULL);
+   rc  = of_property_read_u32_index(rtas.dev, linux,rtas-base, 0, base);
+   rc |= of_property_read_u32_index(rtas.dev, rtas-size, 0, size);
 
-   if (basep  sizep) {
-   rtas_start = be32_to_cpup(basep);
-   rtas_end = rtas_start + be32_to_cpup(sizep);
+   if (rc == 0) {
+   rtas_start = base;
+   rtas_end = rtas_start + size;
}
 
for (addr = begin; addr  end; addr += PAGE_SIZE) {






___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH v2] powerpc/powernv: Framework to log critical errors on powernv.

2013-12-17 Thread Deepthi Dharwar
Hi Micheal,

Thanks for the review.

On 12/18/2013 08:13 AM, Michael Ellerman wrote:
 On Mon, 2013-12-16 at 18:00 +0530, Deepthi Dharwar wrote:
 This patch provides error logging interfaces to report critical
 powernv error to FSP.
 All the required information to dump the error is collected
 at POWERNV level through error log interfaces
 and then pushed on to FSP.

 This also supports synchronous error logging in cases of
 PANIC, by passing OPAL_ERROR_PANIC as severity in
 elog_create() call.
 
 Please make note of the fact that none of this code is currently used but will
 be in a subsequent patch. When can we expect those patches?

This patch only adds the framework to log errors. Coming days this
framework will be used to report all POWERNV errors in a phased manner.
We would ideally be targeting one sub-system at a time and use these
interfaces.

 
 diff --git a/arch/powerpc/include/asm/opal.h 
 b/arch/powerpc/include/asm/opal.h
 index 0f01308..1c5440a 100644
 --- a/arch/powerpc/include/asm/opal.h
 +++ b/arch/powerpc/include/asm/opal.h
 @@ -168,6 +168,7 @@ extern int opal_enter_rtas(struct rtas_args *args,
  #define OPAL_GET_MSG85
  #define OPAL_CHECK_ASYNC_COMPLETION 86
  #define OPAL_SYNC_HOST_REBOOT   87
 +#define OPAL_ELOG_SEND  88
  
  #ifndef __ASSEMBLY__
  
 @@ -260,6 +261,122 @@ enum OpalMessageType {
  OPAL_MSG_TYPE_MAX,
  };
  
 +/* Classification of Error/Events reporting type classification
 
 Standard comment style for block comments is:
 
 /*
  * Classification ...
  */
 
 That applies to almost all of your comments in here.
 
 
 + * Platform Events/Errors: Report Machine Check Interrupt
 
 I think these comments would be better inline with the values, eg:
 
   /* Report Machine Check Interrupt */
   OPAL_PLATFORM,
 
   /* Report all I/O related events/errors */
   OPAL_INPUT_OUTPUT,
 
   etc.
 
 
 Again that applies to most of your comments.

Sure, I'll make it inline.

 + * INPUT_OUTPUT: Report all I/O related events/errors
 + * RESOURCE_DEALLOC: Hotplug events and errors
 + * MISC: Miscellanous error
 + * Field: error_events_type
 
 What is this Field: thing about?

Field is just to add some readability that these options relate to
corresponding elog_create argument field.
Looks like the purpose is not getting solved.

 + */
 +enum error_events {
 
 If you're going to define an enum you should actually use it in the API, I
 can't see anywhere you do?
 
 If you do want to use an enum it should be opal_error_events.

Agree.

 +OPAL_PLATFORM,
 +OPAL_INPUT_OUTPUT,
 +OPAL_RESOURCE_DEALLOC,
 +OPAL_MISC,
 +};
 +/* OPAL Subsystem IDs listed for reporting events/errors
 + * Field: subsystem_id
 + */
 +
 +#define OPAL_PROCESSOR_SUBSYSTEM0x10
 +#define OPAL_MEMORY_SUBSYSTEM   0x20
 +#define OPAL_IO_SUBSYSTEM   0x30
 +#define OPAL_IO_DEVICES 0x40
 +#define OPAL_CEC_HARDWARE   0x50
 +#define OPAL_POWER_COOLING  0x60
 +#define OPAL_MISC_SUBSYSTEM 0x70
 +#define OPAL_SURVEILLANCE_ERR   0x7A
 +#define OPAL_PLATFORM_FIRMWARE  0x80
 +#define OPAL_SOFTWARE   0x90
 +#define OPAL_EXTERNAL_ENV   0xA0
 +
 +/* During reporting an event/error the following represents
 + * how serious the logged event/error is. (Severity)
 + * Field: event_sev
 + */
 +#define OPAL_INFO   0x00
 +#define OPAL_RECOVERED_ERR_GENERAL  0x10
 +
 +/* 0x2X series is to denote set of Predictive Error
 + * 0x20 Generic predictive error
 + * 0x21 Predictive error, degraded performance
 + * 0x22 Predictive error, fault may be corrected after reboot
 + * 0x23 Predictive error, fault may be corrected after reboot,
 + * degraded performance
 + * 0x24 Predictive error, loss of redundancy
 + */
 +#define OPAL_PREDICTIVE_ERR_GENERAL 0x20
 +#define OPAL_PREDICTIVE_ERR_DEGRADED_PERF   0x21
 +#define OPAL_PREDICTIVE_ERR_FAULT_RECTIFY_REBOOT0x22
 +#define OPAL_PREDICTIVE_ERR_FAULT_RECTIFY_BOOT_DEGRADE_PERF 0x23
 +#define OPAL_PREDICTIVE_ERR_LOSS_OF_REDUNDANCY  0x24
 +
 +/* 0x4X series for Unrecoverable Error
 + * 0x40 Generic Unrecoverable error
 + * 0x41 Unrecoverable error bypassed with degraded performance
 + * 0x44 Unrecoverable error bypassed with loss of redundancy
 + * 0x45 Unrecoverable error bypassed with loss of redundancy and performance
 + * 0x48 Unrecoverable error bypassed with loss of function
 + */
 +#define OPAL_UNRECOVERABLE_ERR_GENERAL  0x40
 +#define OPAL_UNRECOVERABLE_ERR_DEGRADE_PERF 0x41
 +#define OPAL_UNRECOVERABLE_ERR_LOSS_REDUNDANCY  0x44
 +#define OPAL_UNRECOVERABLE_ERR_LOSS_REDUNDANCY_PERF 0x45
 +#define OPAL_UNRECOVERABLE_ERR_LOSS_OF_FUNCTION 0x48
 +#define OPAL_ERROR_PANIC

Re: [PATCH v2] powerpc/powernv: Framework to log critical errors on powernv.

2013-12-17 Thread Michael Ellerman
On Wed, 2013-12-18 at 10:48 +0530, Deepthi Dharwar wrote:
 Hi Micheal,
 
 Thanks for the review.

No worries.
 
 On 12/18/2013 08:13 AM, Michael Ellerman wrote:
  On Mon, 2013-12-16 at 18:00 +0530, Deepthi Dharwar wrote:
  +/* All the information regarding an error/event to be reported
  + * needs to populate this structure using pre-defined interfaces
  + * only
  + */
  +struct opal_errorlog {
  +
  +  uint16_t component_id;
  +  uint8_t error_events_type:3;
  
  Bit field?
  
  +  uint8_t subsystem_id;
  +
  +  uint8_t event_sev;
  +  uint8_t event_subtype;
  +  uint8_t usr_scn_count; /* User section count */
  
  user_section_count;
  
  +  uint8_t elog_origin;
  +
  +  uint32_t usr_scn_size; /* User section size */
  
  user_section_size;
  
  +  uint32_t reason_code;
  +  uint32_t additional_info[4];
  +
  +  char usr_data_dump[OPAL_LOG_MAX_DUMP];
  +};
  
  It looks like this goes straight to Opal, should we be using __packed ?
 
 Yes, this goes straight into Opal. The structure is defined such that
 it is packed by default, this will not require compiler to pack bytes.

Sure, but the compiler might decide to lay it out differently for some reason.
You should use __packed.

Also bitfields are essentially a big implementation defined behaviour sign,
so I would avoid using the bitfield.

  diff --git a/arch/powerpc/platforms/powernv/opal-elog.c 
  b/arch/powerpc/platforms/powernv/opal-elog.c
  index 58849d0..ade1e58 100644
  --- a/arch/powerpc/platforms/powernv/opal-elog.c
  +++ b/arch/powerpc/platforms/powernv/opal-elog.c
  @@ -22,7 +23,9 @@
   /* Maximum size of a single log on FSP is 16KB */
   #define OPAL_MAX_ERRLOG_SIZE  16384
   
  -/* maximu number of records powernv can hold */
  +#define USR_CHAR_ARRAY_FIXED_SIZE  4
  
  What is this?
 
 struct User data section is mapped to a buffer. As all the structures
 are padded, we need to subtract the same to do data manipulation.
 Make me need to re-word it or use __packed here.

Yeah that's still not really clear to me, so if you can do something that is
more obvious that would be good.

  @@ -272,6 +275,61 @@ static int init_err_log_buffer(void)
 return 0;
   }
   
  +/* Interface to be used by POWERNV to push the logs to FSP via Sapphire */
  +struct opal_errorlog *elog_create(uint8_t err_evt_type, uint16_t 
  component_id,
  +  uint8_t subsystem_id, uint8_t event_sev, uint8_t  event_subtype,
  +  uint32_t reason_code, uint32_t info0, uint32_t info1,
  +  uint32_t info2, uint32_t info3)
  
  
  A call to this function is going to be just a giant list of integer values, 
  it
  will not be easy to see at a glance which value goes in which field.
  
  I think you'd be better off with an elog_alloc() routine, and then you just 
  do
  the initialisation explicitly so that it's obvious which value goes where:
  
  elog-error_events_type = FOO;
  elog-component_id = BAR;
  elog-subsystem_id = ETC;
  
 
 elog_create() will be called by all sub-systems on POWERNV platform to
 log events and errors. I feel we are better off passing all the required
 arguments to the interface than initialize explicitly.
 This would have a cleaner interface to error logging by
 1) Removing huge amount of code duplication ( Each and every error/event
 to be reported needs to initialise fields of the opal_errorlog struct
 done many many times on POWERNV, results in redundant code )

It will be more lines of code, but it might be more readable code.

 2) There are chances of missing out on initialising key fields if
 done by the user. Having an interface mandates the fields that
 needs to populated while logging error/events.

I can always pass 0 :)

We will see how it looks once there are some callers.

  +void commit_errorlog_to_fsp(struct opal_errorlog *buf)
  +{
  +  opal_commit_log_to_fsp((void *)(vmalloc_to_pfn(buf)  PAGE_SHIFT));
  
  Can't fail?
 
 It is better to have a return here, atleast for the caller to know if
 opal handling of the same is successful or not. I will make the required
 change.
 
  +  kfree(buf);
  
  It's a bit rude to free buf when the caller still has a pointer to it.
 
 Technically, after the error log has been committed, the user is not
 supposed to re-use or do anything with that buffer. I need to add
 checks in all my routines if(buf != NULL), to handle the case where
 the user by mistake is trying to use the same buffer pointer.

Why is the user not supposed to re-use it?

kfree()'ing the buffer doesn't prevent the caller from re-using it.

cheers


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH v2] powerpc/powernv: Framework to log critical errors on powernv.

2013-12-17 Thread Deepthi Dharwar
On 12/18/2013 10:57 AM, Michael Ellerman wrote:
 On Wed, 2013-12-18 at 10:48 +0530, Deepthi Dharwar wrote:
 Hi Micheal,

 Thanks for the review.
 
 No worries.
 
 On 12/18/2013 08:13 AM, Michael Ellerman wrote:
 On Mon, 2013-12-16 at 18:00 +0530, Deepthi Dharwar wrote:
 +/* All the information regarding an error/event to be reported
 + * needs to populate this structure using pre-defined interfaces
 + * only
 + */
 +struct opal_errorlog {
 +
 +  uint16_t component_id;
 +  uint8_t error_events_type:3;

 Bit field?

 +  uint8_t subsystem_id;
 +
 +  uint8_t event_sev;
 +  uint8_t event_subtype;
 +  uint8_t usr_scn_count; /* User section count */

 user_section_count;

 +  uint8_t elog_origin;
 +
 +  uint32_t usr_scn_size; /* User section size */

 user_section_size;

 +  uint32_t reason_code;
 +  uint32_t additional_info[4];
 +
 +  char usr_data_dump[OPAL_LOG_MAX_DUMP];
 +};

 It looks like this goes straight to Opal, should we be using __packed ?

 Yes, this goes straight into Opal. The structure is defined such that
 it is packed by default, this will not require compiler to pack bytes.
 
 Sure, but the compiler might decide to lay it out differently for some reason.
 You should use __packed.

Ok.

 Also bitfields are essentially a big implementation defined behaviour sign,
 so I would avoid using the bitfield.

Yes, bitfields will be gone.

 diff --git a/arch/powerpc/platforms/powernv/opal-elog.c 
 b/arch/powerpc/platforms/powernv/opal-elog.c
 index 58849d0..ade1e58 100644
 --- a/arch/powerpc/platforms/powernv/opal-elog.c
 +++ b/arch/powerpc/platforms/powernv/opal-elog.c
 @@ -22,7 +23,9 @@
  /* Maximum size of a single log on FSP is 16KB */
  #define OPAL_MAX_ERRLOG_SIZE  16384
  
 -/* maximu number of records powernv can hold */
 +#define USR_CHAR_ARRAY_FIXED_SIZE  4

 What is this?

 struct User data section is mapped to a buffer. As all the structures
 are padded, we need to subtract the same to do data manipulation.
 Make me need to re-word it or use __packed here.
 
 Yeah that's still not really clear to me, so if you can do something that is
 more obvious that would be good.

Sure.

 @@ -272,6 +275,61 @@ static int init_err_log_buffer(void)
return 0;
  }
  
 +/* Interface to be used by POWERNV to push the logs to FSP via Sapphire */
 +struct opal_errorlog *elog_create(uint8_t err_evt_type, uint16_t 
 component_id,
 +  uint8_t subsystem_id, uint8_t event_sev, uint8_t  event_subtype,
 +  uint32_t reason_code, uint32_t info0, uint32_t info1,
 +  uint32_t info2, uint32_t info3)


 A call to this function is going to be just a giant list of integer values, 
 it
 will not be easy to see at a glance which value goes in which field.

 I think you'd be better off with an elog_alloc() routine, and then you just 
 do
 the initialisation explicitly so that it's obvious which value goes where:

 elog-error_events_type = FOO;
 elog-component_id = BAR;
 elog-subsystem_id = ETC;


 elog_create() will be called by all sub-systems on POWERNV platform to
 log events and errors. I feel we are better off passing all the required
 arguments to the interface than initialize explicitly.
 This would have a cleaner interface to error logging by
 1) Removing huge amount of code duplication ( Each and every error/event
 to be reported needs to initialise fields of the opal_errorlog struct
 done many many times on POWERNV, results in redundant code )
 
 It will be more lines of code, but it might be more readable code.
 
 2) There are chances of missing out on initialising key fields if
 done by the user. Having an interface mandates the fields that
 needs to populated while logging error/events.
 
 I can always pass 0 :)

I was referring to more on the lines of missing unintentionally :)

 We will see how it looks once there are some callers.

Sure. I will retain it for now. Going forward once we start adding
users exploiting this interface, we can then take a call.
 
 +void commit_errorlog_to_fsp(struct opal_errorlog *buf)
 +{
 +  opal_commit_log_to_fsp((void *)(vmalloc_to_pfn(buf)  PAGE_SHIFT));

 Can't fail?

 It is better to have a return here, atleast for the caller to know if
 opal handling of the same is successful or not. I will make the required
 change.

 +  kfree(buf);

 It's a bit rude to free buf when the caller still has a pointer to it.

 Technically, after the error log has been committed, the user is not
 supposed to re-use or do anything with that buffer. I need to add
 checks in all my routines if(buf != NULL), to handle the case where
 the user by mistake is trying to use the same buffer pointer.
 
 Why is the user not supposed to re-use it?
 
 kfree()'ing the buffer doesn't prevent the caller from re-using it.

Release the memory. Assign pointer to NULL before returning.
All the error logging interfaces should have NULL check (to return).
User can't do much in that case.

Regards,
Deepthi

 cheers
 
 
 
 

___

Re: [PATCH] powerpc: book3s: kvm: Don't abuse host r2 in exit path

2013-12-17 Thread Aneesh Kumar K.V

Hi Alex,

Any update on this ? We need this to got into 3.13.

-aneesh 

Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com writes:

 From: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com

 We don't use PACATOC for PR. Avoid updating HOST_R2 with PR
 KVM mode when both HV and PR are enabled in the kernel. Without this we
 get the below crash

 (qemu)
 Unable to handle kernel paging request for data at address 0x8310
 Faulting instruction address: 0xc001d5a4
 cpu 0x2: Vector: 300 (Data Access) at [c001dc53aef0]
 pc: c001d5a4: .vtime_delta.isra.1+0x34/0x1d0
 lr: c001d760: .vtime_account_system+0x20/0x60
 sp: c001dc53b170
msr: 80009032
dar: 8310
  dsisr: 4000
   current = 0xc001d76c62d0
   paca= 0xcfef1100   softe: 0irq_happened: 0x01
 pid   = 4472, comm = qemu-system-ppc
 enter ? for help
 [c001dc53b200] c001d760 .vtime_account_system+0x20/0x60
 [c001dc53b290] c008d050 .kvmppc_handle_exit_pr+0x60/0xa50
 [c001dc53b340] c008f51c kvm_start_lightweight+0xb4/0xc4
 [c001dc53b510] c008cdf0 .kvmppc_vcpu_run_pr+0x150/0x2e0
 [c001dc53b9e0] c008341c .kvmppc_vcpu_run+0x2c/0x40
 [c001dc53ba50] c0080af4 .kvm_arch_vcpu_ioctl_run+0x54/0x1b0
 [c001dc53bae0] c007b4c8 .kvm_vcpu_ioctl+0x478/0x730
 [c001dc53bca0] c02140cc .do_vfs_ioctl+0x4ac/0x770
 [c001dc53bd80] c02143e8 .SyS_ioctl+0x58/0xb0
 [c001dc53be30] c0009e58 syscall_exit+0x0/0x98
 --- Exception: c00 (System Call) at 1f960160
 SP (1ecbe3c0) is in userspace

 These changes were originally part of
 http://mid.gmane.org/20130806042205.gr19...@iris.ozlabs.ibm.com

 Signed-off-by: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com
 ---
  arch/powerpc/include/asm/kvm_book3s_asm.h | 1 +
  arch/powerpc/kernel/asm-offsets.c | 1 +
  arch/powerpc/kvm/book3s_hv_rmhandlers.S   | 7 +++
  3 files changed, 5 insertions(+), 4 deletions(-)

 diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h 
 b/arch/powerpc/include/asm/kvm_book3s_asm.h
 index 0bd9348..69fe837 100644
 --- a/arch/powerpc/include/asm/kvm_book3s_asm.h
 +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
 @@ -79,6 +79,7 @@ struct kvmppc_host_state {
   ulong vmhandler;
   ulong scratch0;
   ulong scratch1;
 + ulong scratch2;
   u8 in_guest;
   u8 restore_hid5;
   u8 napping;
 diff --git a/arch/powerpc/kernel/asm-offsets.c 
 b/arch/powerpc/kernel/asm-offsets.c
 index 8e6ede6..841a4c8 100644
 --- a/arch/powerpc/kernel/asm-offsets.c
 +++ b/arch/powerpc/kernel/asm-offsets.c
 @@ -583,6 +583,7 @@ int main(void)
   HSTATE_FIELD(HSTATE_VMHANDLER, vmhandler);
   HSTATE_FIELD(HSTATE_SCRATCH0, scratch0);
   HSTATE_FIELD(HSTATE_SCRATCH1, scratch1);
 + HSTATE_FIELD(HSTATE_SCRATCH2, scratch2);
   HSTATE_FIELD(HSTATE_IN_GUEST, in_guest);
   HSTATE_FIELD(HSTATE_RESTORE_HID5, restore_hid5);
   HSTATE_FIELD(HSTATE_NAPPING, napping);
 diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S 
 b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
 index 339aa5e..16f7654 100644
 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
 +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
 @@ -750,15 +750,14 @@ kvmppc_interrupt_hv:
* guest CR, R12 saved in shadow VCPU SCRATCH1/0
* guest R13 saved in SPRN_SCRATCH0
*/
 - /* abuse host_r2 as third scratch area; we get r2 from PACATOC(r13) */
 - std r9, HSTATE_HOST_R2(r13)
 + std r9, HSTATE_SCRATCH2(r13)
  
   lbz r9, HSTATE_IN_GUEST(r13)
   cmpwi   r9, KVM_GUEST_MODE_HOST_HV
   beq kvmppc_bad_host_intr
  #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
   cmpwi   r9, KVM_GUEST_MODE_GUEST
 - ld  r9, HSTATE_HOST_R2(r13)
 + ld  r9, HSTATE_SCRATCH2(r13)
   beq kvmppc_interrupt_pr
  #endif
   /* We're now back in the host but in guest MMU context */
 @@ -778,7 +777,7 @@ kvmppc_interrupt_hv:
   std r6, VCPU_GPR(R6)(r9)
   std r7, VCPU_GPR(R7)(r9)
   std r8, VCPU_GPR(R8)(r9)
 - ld  r0, HSTATE_HOST_R2(r13)
 + ld  r0, HSTATE_SCRATCH2(r13)
   std r0, VCPU_GPR(R9)(r9)
   std r10, VCPU_GPR(R10)(r9)
   std r11, VCPU_GPR(R11)(r9)
 -- 
 1.8.3.2

 --
 To unsubscribe from this list: send the line unsubscribe kvm-ppc in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev