[PATCH 4/5] powerpc: Move flush_all_to_thread() below save_sprs()

2016-03-22 Thread Cyril Bur
Signed-off-by: Cyril Bur 
---
 arch/powerpc/kernel/process.c | 34 +-
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 56444a6..7625976 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -511,23 +511,6 @@ void save_all(struct task_struct *tsk)
msr_check_and_clear(msr_all_available);
 }
 
-void flush_all_to_thread(struct task_struct *tsk)
-{
-   if (tsk->thread.regs) {
-   preempt_disable();
-   BUG_ON(tsk != current);
-   save_all(tsk);
-
-#ifdef CONFIG_SPE
-   if (tsk->thread.regs->msr & MSR_SPE)
-   tsk->thread.spefscr = mfspr(SPRN_SPEFSCR);
-#endif
-
-   preempt_enable();
-   }
-}
-EXPORT_SYMBOL(flush_all_to_thread);
-
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
 void do_send_trap(struct pt_regs *regs, unsigned long address,
  unsigned long error_code, int signal_code, int breakpt)
@@ -1047,6 +1030,23 @@ static inline void restore_sprs(struct thread_struct 
*old_thread,
 #endif
 }
 
+void flush_all_to_thread(struct task_struct *tsk)
+{
+   if (tsk->thread.regs) {
+   preempt_disable();
+   BUG_ON(tsk != current);
+   save_all(tsk);
+
+#ifdef CONFIG_SPE
+   if (tsk->thread.regs->msr & MSR_SPE)
+   tsk->thread.spefscr = mfspr(SPRN_SPEFSCR);
+#endif
+
+   preempt_enable();
+   }
+}
+EXPORT_SYMBOL(flush_all_to_thread);
+
 struct task_struct *__switch_to(struct task_struct *prev,
struct task_struct *new)
 {
-- 
2.7.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 5/5] powerpc: Preserve the SPR values across fork() syscalls

2016-03-22 Thread Cyril Bur
Currently copy_thread() doesn't flush SPRs to the parent thread struct.
Currently this only affects the TAR register as perf takes care of some of the
others and the remaining ones are all Event Based Branch (EBB) registers which
are cleared across fork().

Signed-off-by: Cyril Bur 
---
 arch/powerpc/kernel/process.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 7625976..892c76d 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1037,6 +1037,8 @@ void flush_all_to_thread(struct task_struct *tsk)
BUG_ON(tsk != current);
save_all(tsk);
 
+   save_sprs(>thread);
+
 #ifdef CONFIG_SPE
if (tsk->thread.regs->msr & MSR_SPE)
tsk->thread.spefscr = mfspr(SPRN_SPEFSCR);
-- 
2.7.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 3/5] powerpc: Fix TAR leak across exec() syscalls

2016-03-22 Thread Cyril Bur
Currently start_thread() doesn't sanitise TAR.

The TAR SPR register is a register that can be set and branched to, not
sanitising it presents an information leak to the new executable.

Other SPR registers such as the Performance registers used by perf (and are
managed entirely by perf) as well as the Event Based Branch (EBB) registers are
left alone by design as these fall into the same category as leaving file
descriptors open across exec(), it is up the parent thread to sanitise what it
deems necessary.

Signed-off-by: Cyril Bur 
---
 arch/powerpc/kernel/process.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index d7a9df5..56444a6 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1577,6 +1577,8 @@ void start_thread(struct pt_regs *regs, unsigned long 
start, unsigned long sp)
current->thread.vr_state.vscr.u[3] = 0x0001; /* Java mode disabled 
*/
current->thread.vr_save_area = NULL;
current->thread.vrsave = 0;
+   if (cpu_has_feature(CPU_FTR_ALTIVEC))
+   mtspr(SPRN_VRSAVE, 0);
current->thread.used_vr = 0;
 #endif /* CONFIG_ALTIVEC */
 #ifdef CONFIG_SPE
@@ -1592,6 +1594,18 @@ void start_thread(struct pt_regs *regs, unsigned long 
start, unsigned long sp)
current->thread.tm_texasr = 0;
current->thread.tm_tfiar = 0;
 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+#ifdef CONFIG_PPC_BOOK3S_64
+   /*
+* Zero out the SPRs.
+* Don't touch the ones use by perf, it controls them.
+* Don't touch the EBB regs. This falls into the same category of
+*   responsibly as open file descriptors across exec(), the parent 
should
+*   sanitise if it feels it would be a problem
+*/
+   current->thread.tar = 0;
+   if (cpu_has_feature(CPU_FTR_ARCH_206))
+   mtspr(SPRN_TAR, 0);
+#endif /* CONFIG_PPC_BOOK3S_64 */
 }
 EXPORT_SYMBOL(start_thread);
 
-- 
2.7.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 1/5] selftests/powerpc: Add exec() test to check for spr sanitisation

2016-03-22 Thread Cyril Bur
Signed-off-by: Cyril Bur 
---
 tools/testing/selftests/powerpc/syscalls/Makefile  |  3 +-
 .../testing/selftests/powerpc/syscalls/spr_exec.c  | 78 ++
 2 files changed, 80 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/powerpc/syscalls/spr_exec.c

diff --git a/tools/testing/selftests/powerpc/syscalls/Makefile 
b/tools/testing/selftests/powerpc/syscalls/Makefile
index b35c794..291ba45 100644
--- a/tools/testing/selftests/powerpc/syscalls/Makefile
+++ b/tools/testing/selftests/powerpc/syscalls/Makefile
@@ -1,10 +1,11 @@
-TEST_PROGS := ipc_unmuxed
+TEST_PROGS := ipc_unmuxed spr_exec
 
 CFLAGS += -I../../../../../usr/include
 
 all: $(TEST_PROGS)
 
 $(TEST_PROGS): ../harness.c
+spr_exec: ../utils.c
 
 include ../../lib.mk
 
diff --git a/tools/testing/selftests/powerpc/syscalls/spr_exec.c 
b/tools/testing/selftests/powerpc/syscalls/spr_exec.c
new file mode 100644
index 000..5ecd6ca
--- /dev/null
+++ b/tools/testing/selftests/powerpc/syscalls/spr_exec.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright 2016, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This test checks that the TAR (an SPR) is correctly sanitised across
+ * execve()
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "utils.h"
+
+static char *name;
+static int count;
+
+static int exec_spr(void)
+{
+   unsigned long tar;
+   char buffer[10];
+   char *args[3];
+
+   asm __volatile__(
+   "mfspr %[tar], 815"
+   : [tar] "=r" (tar)
+   );
+   /* Read TAR */
+   FAIL_IF(tar != 0);
+
+   tar = 1;
+   asm __volatile__(
+   "mtspr 815, %[tar]"
+   :
+   : [tar] "r" (tar)
+   );
+
+   FAIL_IF(sprintf(buffer, "%d", count + 1) == -1);
+   args[0] = name;
+   args[1] = buffer;
+   args[2] = NULL;
+   FAIL_IF(execve(name, args, NULL) == -1);
+
+   return 0;
+}
+
+static int exec_spr_check(void)
+{
+   unsigned long tar;
+
+   asm __volatile__(
+   "mfspr %[tar], 815;"
+   : [tar] "=r" (tar)
+   );
+   /* Read TAR */
+   FAIL_IF(tar != 0);
+
+   return 0;
+}
+
+int main(int argc, char *argv[])
+{
+   SKIP_IF(!have_hwcap2(PPC_FEATURE2_TAR));
+   name = argv[0];
+   /* Do this a few times to be sure isn't a false negative */
+   if (argc == 1 || atoi(argv[1]) < 10) {
+   if (argc > 1)
+   count = atoi(argv[1]);
+   return test_harness(exec_spr, "spr_exec");
+   } else {
+   return test_harness(exec_spr_check, "spr_exec_check");
+   }
+}
-- 
2.7.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 2/5] selftests/powerpc: Add fork() test to check for spr being preserved

2016-03-22 Thread Cyril Bur
Signed-off-by: Cyril Bur 
---
 tools/testing/selftests/powerpc/syscalls/Makefile  |  3 +-
 .../testing/selftests/powerpc/syscalls/spr_fork.c  | 78 ++
 2 files changed, 80 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/powerpc/syscalls/spr_fork.c

diff --git a/tools/testing/selftests/powerpc/syscalls/Makefile 
b/tools/testing/selftests/powerpc/syscalls/Makefile
index 291ba45..55969f3 100644
--- a/tools/testing/selftests/powerpc/syscalls/Makefile
+++ b/tools/testing/selftests/powerpc/syscalls/Makefile
@@ -1,4 +1,4 @@
-TEST_PROGS := ipc_unmuxed spr_exec
+TEST_PROGS := ipc_unmuxed spr_exec spr_fork
 
 CFLAGS += -I../../../../../usr/include
 
@@ -6,6 +6,7 @@ all: $(TEST_PROGS)
 
 $(TEST_PROGS): ../harness.c
 spr_exec: ../utils.c
+spr_fork: ../utils.c
 
 include ../../lib.mk
 
diff --git a/tools/testing/selftests/powerpc/syscalls/spr_fork.c 
b/tools/testing/selftests/powerpc/syscalls/spr_fork.c
new file mode 100644
index 000..1a351a6
--- /dev/null
+++ b/tools/testing/selftests/powerpc/syscalls/spr_fork.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright 2016, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This test checks that the TAR register (an SPR) is correctly preserved
+ * across a fork()
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "utils.h"
+
+static int fork_spr(void)
+{
+   int child_ret;
+   unsigned long tar;
+   pid_t pid;
+   int i;
+
+
+   /* Do it a few times as there is a chance that one might luckily pass */
+   i = 0;
+   while (i < 10) {
+   /* What are the odds... */
+   tar = 0x123456;
+   asm __volatile__(
+   "mtspr 815, %[tar]"
+   :
+   : [tar] "r" (tar)
+   );
+
+   pid = fork();
+   FAIL_IF(pid == -1);
+   asm __volatile__(
+   "mfspr %[tar], 815"
+   : [tar] "=r" (tar)
+   );
+
+   FAIL_IF(tar != 0x123456);
+
+   if (pid == 0)
+   exit(0);
+
+   FAIL_IF(waitpid(pid, _ret, 0) == -1);
+
+   /* Child haddn't exited ? */
+   FAIL_IF(!WIFEXITED(child_ret));
+
+   /* Child detected a bad tar */
+   FAIL_IF(WEXITSTATUS(child_ret));
+
+   /* Reset it */
+   tar = 0;
+   asm __volatile__(
+   "mtspr 815, %[tar]"
+   :
+   : [tar] "r" (tar)
+   );
+
+   i++;
+   }
+
+   return 0;
+}
+
+int main(int argc, char *argv[])
+{
+   SKIP_IF(!have_hwcap2(PPC_FEATURE2_TAR));
+   return test_harness(fork_spr, "spr_fork");
+}
-- 
2.7.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] tty/hvc: Use IRQF_SHARED for hvc consoles

2016-03-22 Thread Stewart Smith
Samuel Mendoza-Jonas  writes:
> Commit 2def86a7200c
> ("hvc: Convert to using interrupts instead of opal events")
> enabled the use of interrupts in the hvc_driver for OPAL platforms.
> However on machines with more than one hvc console, any console after
> the first will fail to register an interrupt handler in
> notifier_add_irq() since all consoles share the same IRQ number but do
> not set the IRQF_SHARED flag:
>
> [   51.179907] genirq: Flags mismatch irq 31.  (hvc_console) vs.
>  (hvc_console)
> [   51.180010] hvc_open: request_irq failed with rc -16.
>
> This error propagates up to hvc_open() and the console is closed, but
> OPAL will still generate interrupts that are not handled, leading to
> rcu_sched stall warnings.
>
> Set IRQF_SHARED when calling request_irq, allowing additional consoles
> to start properly.
>
> Signed-off-by: Samuel Mendoza-Jonas 
> Cc:  # 4.1.x-

Tested on 4.4.6 - seemed to stop (some of) the problems I was having
when using it as a kernel for the bootloader on a FSP based POWER8
system.

Tested-by: Stewart Smith 

-- 
Stewart Smith
OPAL Architect, IBM.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 2/2] powerpc/pagetable: Add option to dump kernel hashpagetable

2016-03-22 Thread kbuild test robot
Hi Rashmica,

[auto build test ERROR on powerpc/next]
[also build test ERROR on v4.5 next-20160322]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improving the system]

url:
https://github.com/0day-ci/linux/commits/Rashmica-Gupta/powerpc-pagetable-Add-option-to-dump-the-linux-pagetables/20160322-060934
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: powerpc-allyesconfig (attached as .config)
reproduce:
wget 
https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross
 -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=powerpc 

All errors (new ones prefixed by >>):

   arch/powerpc/mm/dump_hashpagetable.c: In function 'walk_pte':
>> arch/powerpc/mm/dump_hashpagetable.c:363:18: error: '_PAGE_COMBO' undeclared 
>> (first use in this function)
  if (((pteval & _PAGE_COMBO) == _PAGE_COMBO) ||
 ^
   arch/powerpc/mm/dump_hashpagetable.c:363:18: note: each undeclared 
identifier is reported only once for each function it appears in

vim +/_PAGE_COMBO +363 arch/powerpc/mm/dump_hashpagetable.c

   357  if (addr < VMALLOC_END)
   358  psize = mmu_vmalloc_psize;
   359  else
   360  psize = mmu_io_psize;
   361  
   362  /* check for secret 4K mappings */
 > 363  if (((pteval & _PAGE_COMBO) == _PAGE_COMBO) ||
   364  ((pteval & _PAGE_4K_PFN) == _PAGE_4K_PFN))
   365  psize = mmu_io_psize;
   366  

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: Binary data
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH kernel 9/9] KVM: PPC: VFIO device: support SPAPR TCE

2016-03-22 Thread David Gibson
On Tue, Mar 22, 2016 at 11:34:55AM +1100, Alexey Kardashevskiy wrote:
> Uff, lost cc: list. Added back. Some comments below.
> 
> 
> On 03/21/2016 04:19 PM, David Gibson wrote:
> >On Fri, Mar 18, 2016 at 11:12:26PM +1100, Alexey Kardashevskiy wrote:
> >>On March 15, 2016 17:29:26 David Gibson  wrote:
> >>
> >>>On Fri, Mar 11, 2016 at 10:09:50AM +1100, Alexey Kardashevskiy wrote:
> On 03/10/2016 04:21 PM, David Gibson wrote:
> >On Wed, Mar 09, 2016 at 08:20:12PM +1100, Alexey Kardashevskiy wrote:
> >>On 03/09/2016 04:45 PM, David Gibson wrote:
> >>>On Mon, Mar 07, 2016 at 02:41:17PM +1100, Alexey Kardashevskiy wrote:
> sPAPR TCE IOMMU is para-virtualized and the guest does map/unmap
> via hypercalls which take a logical bus id (LIOBN) as a target IOMMU
> identifier. LIOBNs are made up, advertised to guest systems and
> linked to IOMMU groups by the user space.
> In order to enable acceleration for IOMMU operations in KVM, we need
> to tell KVM the information about the LIOBN-to-group mapping.
> 
> For that, a new KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE_LIOBN parameter
> is added which accepts:
> - a VFIO group fd and IO base address to find the actual hardware
> TCE table;
> - a LIOBN to assign to the found table.
> 
> Before notifying KVM about new link, this check the group for being
> registered with KVM device in order to release them at unexpected KVM
> finish.
> 
> This advertises the new KVM_CAP_SPAPR_TCE_VFIO capability to the user
> space.
> 
> While we are here, this also fixes VFIO KVM device compiling to let it
> link to a KVM module.
> 
> Signed-off-by: Alexey Kardashevskiy 
> ---
>   Documentation/virtual/kvm/devices/vfio.txt |  21 +-
>   arch/powerpc/kvm/Kconfig   |   1 +
>   arch/powerpc/kvm/Makefile  |   5 +-
>   arch/powerpc/kvm/powerpc.c |   1 +
>   include/uapi/linux/kvm.h   |   9 +++
>   virt/kvm/vfio.c| 106
> +
>   6 files changed, 140 insertions(+), 3 deletions(-)
> 
> diff --git a/Documentation/virtual/kvm/devices/vfio.txt
> b/Documentation/virtual/kvm/devices/vfio.txt
> index ef51740..c0d3eb7 100644
> --- a/Documentation/virtual/kvm/devices/vfio.txt
> +++ b/Documentation/virtual/kvm/devices/vfio.txt
> @@ -16,7 +16,24 @@ Groups:
> 
>   KVM_DEV_VFIO_GROUP attributes:
> KVM_DEV_VFIO_GROUP_ADD: Add a VFIO group to VFIO-KVM device 
>  tracking
> + kvm_device_attr.addr points to an int32_t file descriptor
> + for the VFIO group.
> >>>
> >>>AFAICT these changes are accurate for VFIO as it is already, in which
> >>>case it might be clearer to put them in a separate patch.
> >>>
> KVM_DEV_VFIO_GROUP_DEL: Remove a VFIO group from VFIO-KVM device
> tracking
> + kvm_device_attr.addr points to an int32_t file descriptor
> + for the VFIO group.
> 
> -For each, kvm_device_attr.addr points to an int32_t file descriptor
> -for the VFIO group.
> +  KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE_LIOBN: sets a liobn for a VFIO 
> group
> + kvm_device_attr.addr points to a struct:
> + struct kvm_vfio_spapr_tce_liobn {
> + __u32   argsz;
> + __s32   fd;
> + __u32   liobn;
> + __u8pad[4];
> + __u64   start_addr;
> + };
> + where
> + @argsz is the size of kvm_vfio_spapr_tce_liobn;
> + @fd is a file descriptor for a VFIO group;
> + @liobn is a logical bus id to be associated with the 
> group;
> + @start_addr is a DMA window offset on the IO (PCI) bus
> >>>
> >>>For the cause of DDW and multiple windows, I'm assuming you can call
> >>>this multiple times with different LIOBNs and the same IOMMU group?
> >>
> >>
> >>Yes. It is called twice per each group (when DDW is activated) - for 
> >>32bit
> >>and 64bit windows, this is why @start_addr is there.
> >>
> >>
> diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
> index 1059846..dfa3488 100644
> --- a/arch/powerpc/kvm/Kconfig
> +++ b/arch/powerpc/kvm/Kconfig
> @@ -65,6 +65,7 @@ config KVM_BOOK3S_64
>   select KVM
>   select KVM_BOOK3S_PR_POSSIBLE if !KVM_BOOK3S_HV_POSSIBLE
>   select SPAPR_TCE_IOMMU if 

Re: pseries/eeh: Handle RTAS delay requests in configure_bridge

2016-03-22 Thread Russell Currey
On Wed, 2016-03-23 at 11:38 +1100, Michael Ellerman wrote:
> On Tue, 2016-22-03 at 00:34:55 UTC, Russell Currey wrote:
> > 
> > In the configure_pe and configure_bridge RTAS calls, the spec states
> > that values of 9900-9905 can be returned, indicating that software
> > should delay for 10^x (where x is the last digit, i.e. 990x)
> > milliseconds and attempt the call again. Currently, the kernel doesn't
> > know about this, and respecting it fixes some PCI failures when the
> > hypervisor is busy.
> > 
> > The delay is capped at 0.2 seconds.
> > 
> > Signed-off-by: Russell Currey 
> > ---
> >  arch/powerpc/platforms/pseries/eeh_pseries.c | 63
> > +---
> >  1 file changed, 47 insertions(+), 16 deletions(-)
> > 
> > diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c
> > b/arch/powerpc/platforms/pseries/eeh_pseries.c
> > index ac3ffd9..c5603185 100644
> > --- a/arch/powerpc/platforms/pseries/eeh_pseries.c
> > +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
> > @@ -614,30 +614,61 @@ static int pseries_eeh_get_log(struct eeh_pe *pe,
> > int severity, char *drv_log, u
> >  static int pseries_eeh_configure_bridge(struct eeh_pe *pe)
> >  {
> >     int config_addr;
> > -   int ret;
> > +   int ret = -1;
> If possible it's preferable not to pre-initialise your return value.
> 
> If you leave it uninitialised then the compiler can (hopefully) detect
> any
> paths where you fail to initialise it.
> 
Relic of a previous iteration, my bad.
> > 
> > +   /* Waiting 0.2s maximum before skipping configuration */
> > +   int max_wait = 200;
> > +   int mwait;
> >  
> >     /* Figure out the PE address */
> >     config_addr = pe->config_addr;
> >     if (pe->addr)
> >     config_addr = pe->addr;
> >  
> > -   /* Use new configure-pe function, if supported */
> > -   if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE) {
> > -   ret = rtas_call(ibm_configure_pe, 3, 1, NULL,
> > -   config_addr, BUID_HI(pe->phb->buid),
> > -   BUID_LO(pe->phb->buid));
> > -   } else if (ibm_configure_bridge != RTAS_UNKNOWN_SERVICE) {
> > -   ret = rtas_call(ibm_configure_bridge, 3, 1, NULL,
> > -   config_addr, BUID_HI(pe->phb->buid),
> > -   BUID_LO(pe->phb->buid));
> > -   } else {
> > -   return -EFAULT;
> > -   }
> > +   while (1) {
> > +   if (max_wait < 0)
> > +   goto err;
> Can't you just do:
> 
>   while (max_wait > 0) {
> 
> ?
...yes.
> 
> > 
> > +
> > +   /* Use new configure-pe function, if supported */
> > +   if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE) {
> > +   ret = rtas_call(ibm_configure_pe, 3, 1, NULL,
> > +   config_addr, BUID_HI(pe->phb-
> > >buid),
> > +   BUID_LO(pe->phb->buid));
> > +   } else if (ibm_configure_bridge !=
> > RTAS_UNKNOWN_SERVICE) {
> > +   ret = rtas_call(ibm_configure_bridge, 3, 1,
> > NULL,
> > +   config_addr, BUID_HI(pe->phb-
> > >buid),
> > +   BUID_LO(pe->phb->buid));
> > +   } else {
> > +   return -EFAULT;
> > +   }
> I realise you've just indented that code, but it would be nice to clean
> it up
> as a precursor patch.
> 
> AFAICS the args are identical, so you could just put the token in a
> variable
> and do the rtas_call() once. It also looks like we check at startup that
> we
> have one of the tokens, so we could choose the appropriate token then and
> avoid
> any conditinals in this code.

Yes, good idea.
> 
> > 
> >  
> > -   if (ret)
> > -   pr_warn("%s: Unable to configure bridge PHB#%d-PE#%x
> > (%d)\n",
> > -   __func__, pe->phb->global_number, pe->addr,
> > ret);
> > +   /*
> > +    * If RTAS returns a delay value, it expects software
> > to sleep
> > +    * for 10^x milliseconds.  The max value it can return
> > is thus
> > +    * 10^5 (RTAS_EXTENDED_DELAY_MAX), which is way too
> > long.
> > +    */
> >  
> > +   switch (ret) {
> > +   case 0:
> > +   return ret;
> > +   case RTAS_EXTENDED_DELAY_MIN:
> > +   mwait = 1;
> > +   break;
> > +   case RTAS_EXTENDED_DELAY_MAX:
> > +   mwait = 10;
> > +   break;
> > +   case RTAS_EXTENDED_DELAY_MIN+2:
> > +   mwait = 100;
> > +   break;
> > +   default:
> > +   goto err;
> > +   }
> > +
> > +   max_wait -= mwait;
> > +   msleep(mwait);
> Can you use rtas_busy_delay() ?

Wasn't aware of that, makes life a lot easier.  Do you know if the 0.2s
maximum delay also applies across the board?  I definitely want to enforce
it here, but if it is ubiquitous then it should be 

Re: pseries/eeh: Handle RTAS delay requests in configure_bridge

2016-03-22 Thread Michael Ellerman
On Tue, 2016-22-03 at 00:34:55 UTC, Russell Currey wrote:
> In the configure_pe and configure_bridge RTAS calls, the spec states
> that values of 9900-9905 can be returned, indicating that software
> should delay for 10^x (where x is the last digit, i.e. 990x)
> milliseconds and attempt the call again. Currently, the kernel doesn't
> know about this, and respecting it fixes some PCI failures when the
> hypervisor is busy.
> 
> The delay is capped at 0.2 seconds.
> 
> Signed-off-by: Russell Currey 
> ---
>  arch/powerpc/platforms/pseries/eeh_pseries.c | 63 
> +---
>  1 file changed, 47 insertions(+), 16 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c 
> b/arch/powerpc/platforms/pseries/eeh_pseries.c
> index ac3ffd9..c5603185 100644
> --- a/arch/powerpc/platforms/pseries/eeh_pseries.c
> +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
> @@ -614,30 +614,61 @@ static int pseries_eeh_get_log(struct eeh_pe *pe, int 
> severity, char *drv_log, u
>  static int pseries_eeh_configure_bridge(struct eeh_pe *pe)
>  {
>   int config_addr;
> - int ret;
> + int ret = -1;

If possible it's preferable not to pre-initialise your return value.

If you leave it uninitialised then the compiler can (hopefully) detect any
paths where you fail to initialise it.

> + /* Waiting 0.2s maximum before skipping configuration */
> + int max_wait = 200;
> + int mwait;
>  
>   /* Figure out the PE address */
>   config_addr = pe->config_addr;
>   if (pe->addr)
>   config_addr = pe->addr;
>  
> - /* Use new configure-pe function, if supported */
> - if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE) {
> - ret = rtas_call(ibm_configure_pe, 3, 1, NULL,
> - config_addr, BUID_HI(pe->phb->buid),
> - BUID_LO(pe->phb->buid));
> - } else if (ibm_configure_bridge != RTAS_UNKNOWN_SERVICE) {
> - ret = rtas_call(ibm_configure_bridge, 3, 1, NULL,
> - config_addr, BUID_HI(pe->phb->buid),
> - BUID_LO(pe->phb->buid));
> - } else {
> - return -EFAULT;
> - }
> + while (1) {
> + if (max_wait < 0)
> + goto err;

Can't you just do:

while (max_wait > 0) {

?

> +
> + /* Use new configure-pe function, if supported */
> + if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE) {
> + ret = rtas_call(ibm_configure_pe, 3, 1, NULL,
> + config_addr, BUID_HI(pe->phb->buid),
> + BUID_LO(pe->phb->buid));
> + } else if (ibm_configure_bridge != RTAS_UNKNOWN_SERVICE) {
> + ret = rtas_call(ibm_configure_bridge, 3, 1, NULL,
> + config_addr, BUID_HI(pe->phb->buid),
> + BUID_LO(pe->phb->buid));
> + } else {
> + return -EFAULT;
> + }

I realise you've just indented that code, but it would be nice to clean it up
as a precursor patch.

AFAICS the args are identical, so you could just put the token in a variable
and do the rtas_call() once. It also looks like we check at startup that we
have one of the tokens, so we could choose the appropriate token then and avoid
any conditinals in this code.

>  
> - if (ret)
> - pr_warn("%s: Unable to configure bridge PHB#%d-PE#%x (%d)\n",
> - __func__, pe->phb->global_number, pe->addr, ret);
> + /*
> +  * If RTAS returns a delay value, it expects software to sleep
> +  * for 10^x milliseconds.  The max value it can return is thus
> +  * 10^5 (RTAS_EXTENDED_DELAY_MAX), which is way too long.
> +  */
>  
> + switch (ret) {
> + case 0:
> + return ret;
> + case RTAS_EXTENDED_DELAY_MIN:
> + mwait = 1;
> + break;
> + case RTAS_EXTENDED_DELAY_MAX:
> + mwait = 10;
> + break;
> + case RTAS_EXTENDED_DELAY_MIN+2:
> + mwait = 100;
> + break;
> + default:
> + goto err;
> + }
> +
> + max_wait -= mwait;
> + msleep(mwait);

Can you use rtas_busy_delay() ?

> + }
> + err:
> + pr_warn("%s: Unable to configure bridge PHB#%d-PE#%x (%d)\n",
> + __func__, pe->phb->global_number, pe->addr, ret);
>   return ret;
>  }


cheers
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] pseries/eeh: Handle RTAS delay requests in configure_bridge

2016-03-22 Thread Russell Currey
On Tue, 2016-03-22 at 11:34 +1100, Russell Currey wrote:
> In the configure_pe and configure_bridge RTAS calls, the spec states
> that values of 9900-9905 can be returned, indicating that software
> should delay for 10^x (where x is the last digit, i.e. 990x)
> milliseconds and attempt the call again. Currently, the kernel doesn't
> know about this, and respecting it fixes some PCI failures when the
> hypervisor is busy.
> 
> The delay is capped at 0.2 seconds.
> 
> Signed-off-by: Russell Currey 

Forgot to mention this patch should go to stable, 3.10+
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 1/2] powerpc/pagetable: Add option to dump the linux pagetables

2016-03-22 Thread kbuild test robot
Hi Rashmica,

[auto build test ERROR on powerpc/next]
[also build test ERROR on v4.5 next-20160322]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improving the system]

url:
https://github.com/0day-ci/linux/commits/Rashmica-Gupta/powerpc-pagetable-Add-option-to-dump-the-linux-pagetables/20160322-060934
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: powerpc-allyesconfig (attached as .config)
reproduce:
wget 
https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross
 -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=powerpc 

All errors (new ones prefixed by >>):

>> arch/powerpc/mm/dump_linuxpagetables.c:148:11: error: '_PAGE_COMBO' 
>> undeclared here (not in a function)
  .mask = _PAGE_COMBO,
  ^

vim +/_PAGE_COMBO +148 arch/powerpc/mm/dump_linuxpagetables.c

   142  .clear  = "",
   143  }, {
   144  .mask   = _PAGE_BUSY,
   145  .val= _PAGE_BUSY,
   146  .set= "busy",
   147  }, {
 > 148  .mask   = _PAGE_COMBO,
   149  .val= _PAGE_COMBO,
   150  .set= "combo",
   151  }, {

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: Binary data
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] arch: powerpc: mm: fixed spelling error.

2016-03-22 Thread Andrew Donnellan

Hi Alok

Thanks for wanting to contribute to the Linux kernel!

On 22/03/16 20:43, mistryalok wrote:

Fixed spelling error.
Signed-off-by: Alok Mistry 
---
/*
-* No CPU has hugepages but lacks no execute, so we
+* No CPU has huge pages but lacks no execute, so we
 * don't need to worry about that case
 */


Within the kernel, we have a lot of internal terminology. If you grep 
through the kernel source code, you'll find that in both the 
documentation and the code, we do use the term "hugepage". We also use 
"huge page", but "hugepage" is more common. Therefore I wouldn't call it 
a spelling error, just an internal term :)



Andrew

--
Andrew Donnellan  OzLabs, ADL Canberra
andrew.donnel...@au1.ibm.com  IBM Australia Limited

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v2 next] cxl: Allow initialization on timebase sync failures

2016-03-22 Thread Ian Munsie
Acked-by: Ian Munsie 

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH] kvm-pr: manage single-step mode

2016-03-22 Thread Laurent Vivier
Until now, when we connect gdb to the QEMU gdb-server, the
single-step mode is not managed.

This patch adds this, only for kvm-pr:

If KVM_GUESTDBG_SINGLESTEP is set, we enable single-step trace bit in the
MSR (MSR_SE) just before the __kvmppc_vcpu_run(), and disable it just after.
In kvmppc_handle_exit_pr, instead of routing the interrupt to
the guest, we return to host, with KVM_EXIT_DEBUG reason.

Signed-off-by: Laurent Vivier 
---
 arch/powerpc/kvm/book3s_pr.c | 31 +--
 1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 95bceca..e6896f4 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -882,6 +882,24 @@ void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr)
 }
 #endif
 
+static void kvmppc_setup_debug(struct kvm_vcpu *vcpu)
+{
+   if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
+   u64 msr = kvmppc_get_msr(vcpu);
+
+   kvmppc_set_msr(vcpu, msr | MSR_SE);
+   }
+}
+
+static void kvmppc_clear_debug(struct kvm_vcpu *vcpu)
+{
+   if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
+   u64 msr = kvmppc_get_msr(vcpu);
+
+   kvmppc_set_msr(vcpu, msr & ~MSR_SE);
+   }
+}
+
 int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
  unsigned int exit_nr)
 {
@@ -1208,8 +1226,13 @@ program_interrupt:
 #endif
case BOOK3S_INTERRUPT_MACHINE_CHECK:
case BOOK3S_INTERRUPT_TRACE:
-   kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
-   r = RESUME_GUEST;
+   if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
+   run->exit_reason = KVM_EXIT_DEBUG;
+   r = RESUME_HOST;
+   } else {
+   kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+   r = RESUME_GUEST;
+   }
break;
default:
{
@@ -1479,6 +1502,8 @@ static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, 
struct kvm_vcpu *vcpu)
goto out;
}
 
+   kvmppc_setup_debug(vcpu);
+
/*
 * Interrupts could be timers for the guest which we have to inject
 * again, so let's postpone them until we're in the guest and if we
@@ -1501,6 +1526,8 @@ static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, 
struct kvm_vcpu *vcpu)
 
ret = __kvmppc_vcpu_run(kvm_run, vcpu);
 
+   kvmppc_clear_debug(vcpu);
+
/* No need for kvm_guest_exit. It's done in handle_exit.
   We also get here with interrupts enabled. */
 
-- 
2.5.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

RE: [PATCH 0/7] PAMU driver update

2016-03-22 Thread Codrin Constantin Ciubotariu
Hi,

Anyone has any comments on this patch-set? Please share your thoughts.

Thanks and best regards,
Codrin

> -Original Message-
> From: Codrin Ciubotariu [mailto:codrin.ciubota...@nxp.com]
> Sent: Monday, 07 March, 2016 5:34 PM
> To: io...@lists.linux-foundation.org
> Cc: scottw...@freescale.com; varun.se...@freescale.com; linuxppc-
> d...@lists.ozlabs.org; Codrin Constantin Ciubotariu
> Subject: [PATCH 0/7] PAMU driver update
> 
> This patchset addresses a few issues found on PAMU IOMMU and small changes to
> enable power management and to support the
> L3 cache controller on some newer boards.
> 
> The series starts with a clean-up patch, followed by two errata fixes: 
> A-007907
> and A-005982. It continues with two fixes for PCIe support. The last two 
> patches
> add support for power management and compatible strings for new L3 controller
> device-tree nodes.
> 
> Codrin Ciubotariu (2):
>   iommu/fsl: Fix most checkpatch warnings and typos
>   iommu/fsl: Work around erratum A-007907
> 
> Varun Sethi (5):
>   iommu/fsl: Enable OMT cache, before invalidating PAACT and SPAACT
> cache
>   iommu/fsl: Factor out PCI specific code
>   iommu/fsl: Enable default DMA window for PCIe devices once detached
> from domain
>   iommu/fsl: PAMU power management support
>   iommu/fsl: Added cache controller compatible strings for SOCs
> 
>  drivers/iommu/fsl_pamu.c| 322 
> 
>  drivers/iommu/fsl_pamu.h|  30 ++--
>  drivers/iommu/fsl_pamu_domain.c | 160 +---
>  drivers/iommu/fsl_pamu_domain.h |   2 +-
>  4 files changed, 381 insertions(+), 133 deletions(-)
> 
> --
> 1.9.3

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v13] cpufreq: powernv: Add sysfs attributes to show throttle stats

2016-03-22 Thread Viresh Kumar
On 22-03-16, 18:57, Shilpasri G Bhat wrote:
> Create sysfs attributes to export throttle information in
> /sys/devices/system/cpu/cpuX/cpufreq/throttle_stats directory. The
> newly added sysfs files are as follows:
> 
> 1)/sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/turbo_stat
> 2)/sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/sub-turbo_stat
> 3)/sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/unthrottle
> 4)/sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/powercap
> 5)/sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/overtemp
> 6)/sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/supply_fault
> 7)/sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/overcurrent
> 8)/sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/occ_reset
> 
> Detailed explanation of each attribute is added to
> Documentation/ABI/testing/sysfs-devices-system-cpu
> 
> CC: linux-...@vger.kernel.org
> Signed-off-by: Shilpasri G Bhat 
> ---
> Changes from v12:
> - Removed (void *) typecast
> - Move the definition of ret inside the 'if' block

Sorry for 12 versions :)

Acked-by: Viresh Kumar 

-- 
viresh
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v13] cpufreq: powernv: Add sysfs attributes to show throttle stats

2016-03-22 Thread Shilpasri G Bhat
Create sysfs attributes to export throttle information in
/sys/devices/system/cpu/cpuX/cpufreq/throttle_stats directory. The
newly added sysfs files are as follows:

1)/sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/turbo_stat
2)/sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/sub-turbo_stat
3)/sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/unthrottle
4)/sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/powercap
5)/sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/overtemp
6)/sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/supply_fault
7)/sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/overcurrent
8)/sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/occ_reset

Detailed explanation of each attribute is added to
Documentation/ABI/testing/sysfs-devices-system-cpu

CC: linux-...@vger.kernel.org
Signed-off-by: Shilpasri G Bhat 
---
Changes from v12:
- Removed (void *) typecast
- Move the definition of ret inside the 'if' block

Changes from v11:
- Removed '*create_throttle_sysfs' 
- policy->driver_data is used instead to maintain the flag for
  one-time creation of throttle sysfs files.

Changes from v10:
- Removed policy_notifiers to use driver->init() instead to create sysfs
- sysfs attributes are removed by kobject_put(policy->kobj)
- Rebased on top of
  http://lkml.iu.edu/hypermail/linux/kernel/1603.2/02268.html

Changes from v9:
- Modified documentation.
- s/throttle_nominal/throttle_sub_turbo

Changes from v8:
- Moved the sysfs attributes from cpu/cpufreq/chipX to 
cpuX/cpufreq/throttle_stats
- Adhering to one-value-per-file, replace throttle_table with multiple
  sysfs files.
- Using CPUFREQ_POLICY_NOTIFIER to add/remove attribute_group.

 Documentation/ABI/testing/sysfs-devices-system-cpu | 69 
 drivers/cpufreq/powernv-cpufreq.c  | 74 +-
 2 files changed, 141 insertions(+), 2 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu 
b/Documentation/ABI/testing/sysfs-devices-system-cpu
index b683e8e..1650133 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -271,3 +271,72 @@ Description:   Parameters for the CPU cache attributes
- WriteBack: data is written only to the cache line and
 the modified cache line is written to main
 memory only when it is replaced
+
+What:  /sys/devices/system/cpu/cpuX/cpufreq/throttle_stats
+   /sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/turbo_stat
+   
/sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/sub_turbo_stat
+   /sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/unthrottle
+   /sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/powercap
+   /sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/overtemp
+   /sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/supply_fault
+   /sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/overcurrent
+   /sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/occ_reset
+Date:  March 2016
+Contact:   Linux kernel mailing list 
+   Linux for PowerPC mailing list 
+Description:   POWERNV CPUFreq driver's frequency throttle stats directory and
+   attributes
+
+   'cpuX/cpufreq/throttle_stats' directory contains the CPU 
frequency
+   throttle stat attributes for the chip. The throttle stats of a 
cpu
+   is common across all the cpus belonging to a chip. Below are the
+   throttle attributes exported in the 'throttle_stats' directory:
+
+   - turbo_stat : This file gives the total number of times the max
+   frequency is throttled to lower frequency in turbo (at and above
+   nominal frequency) range of frequencies.
+
+   - sub_turbo_stat : This file gives the total number of times the
+   max frequency is throttled to lower frequency in sub-turbo(below
+   nominal frequency) range of frequencies.
+
+   - unthrottle : This file gives the total number of times the max
+   frequency is unthrottled after being throttled.
+
+   - powercap : This file gives the total number of times the max
+   frequency is throttled due to 'Power Capping'.
+
+   - overtemp : This file gives the total number of times the max
+   frequency is throttled due to 'CPU Over Temperature'.
+
+   - supply_fault : This file gives the total number of times the
+   max frequency is throttled due to 'Power Supply Failure'.
+
+   - overcurrent : This file gives the total number of times the
+   max frequency is throttled due to 

Re: [PATCH kernel 08/10] powerpc/powernv/npu: Add NPU devices to IOMMU group

2016-03-22 Thread Benjamin Herrenschmidt
On Tue, 2016-03-22 at 12:48 +1100, Alexey Kardashevskiy wrote:
> 
> I suppose GPU from guest1 could trigger DMA from NPU to guest2 memory. 
> Which puts a constrain to management tools not to pass NPU without their 
> GPU counterparts.

Management tools will not be taught such constraints. The plan always
was to make sure they are in the same group. So they should be.

> The host can be affected as bypass is not disabled on NPU when GPU is taken 
> by VFIO, I'll fix this.
>
> >> If I put them to the same group as GPUs, I would have to have
> >> IODA2-linked-to-NPU bridge type with different iommu_table_group_ops  or
> >> have multiple hacks everywhere in IODA2 to enable/disable bypass,
> >> etc.
> >
> > Well.. I suspect it would mean no longer having a 1:1 correspondance
> > between user-visible IOMMU groups and the internal iommu_table.
> 
> Right.

They can share the table too ...

> Right now each GPU is sitting on a separate PHB and has its own PE. And all 
> NPUs sit on a separate PHB and each couple of NPUs (2 links of the same 
> GPU) gets a PE.
> 
> So we have separate PEs (struct pnv_ioda_pe) already, each has its own 
> iommu_table_group_ops with all these VFIO IOMMU callbacks. So to make this 
> all appear as one IOMMU group in sysfs, I will need to stop embedding 
> iommu_table_group into pnv_ioda_pe but make it a pointer with reference 
> counting, etc. Quite a massive change...

Or you just put a quirk flag of some sort and a pointer to the "linked"
PE... sometimes that's a lot easier than lifting up the whole
infrastructure.
> 
> 
> 
>  ---
>    arch/powerpc/platf
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v12] cpufreq: powernv: Add sysfs attributes to show throttle stats

2016-03-22 Thread Viresh Kumar
I really wanted to Ack this time, but you know I am nitpicking again :(

On 22-03-16, 16:18, Shilpasri G Bhat wrote:
>  static int powernv_cpufreq_cpu_init(struct cpufreq_policy *policy)
>  {
> - int base, i;
> + int base, i, ret;
>  
>   base = cpu_first_thread_sibling(policy->cpu);
>  
>   for (i = 0; i < threads_per_core; i++)
>   cpumask_set_cpu(base + i, policy->cpus);
>  
> + if (!policy->driver_data) {

Declare ret here, as it is going to be used only within the if block.

> + ret = sysfs_create_group(>kobj, _attr_grp);
> + if (ret) {
> + pr_info("Failed to create throttle stats directory for 
> cpu %d\n",
> + policy->cpu);
> + return ret;
> + }
> + /*
> +  * policy->driver_data is used as a flag for one-time
> +  * creation of throttle sysfs files.
> +  */
> + policy->driver_data = (void *)policy;

This is far better then the ugly solution I suggested on our private chat :),
but you should drop (void *) typecast. Its not required.

-- 
viresh
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] Added a 5ms wait after a msi-irq is masked

2016-03-22 Thread Philippe Bergheaud

Sorry, I've sent this to the wrong list.

Philippe

Philippe Bergheaud wrote:

From: Vaibhav Jain 

Adds a 5ms wait to phb3_msi_set_xive after the interrupt is masked so
that the kernel delays cleanup until an irq if its in-flight is
handled. The value 5ms is the worst case time needed by an irq to be
presented to the host after its generated.

Signed-off-by: Vaibhav Jain 
---
This patch requires the following patches:
https://patchwork.ozlabs.org/patch/581764/
https://patchwork.ozlabs.org/patch/581765/

 hw/phb3.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/hw/phb3.c b/hw/phb3.c
index fbdcb9e..e5d49b2 100644
--- a/hw/phb3.c
+++ b/hw/phb3.c
@@ -1751,6 +1751,8 @@ static int64_t phb3_msi_set_xive(void *data,
PHB_IVC_UPDATE_ENABLE_Q |
PHB_IVC_UPDATE_ENABLE_GEN;
out_be64(p->regs + PHB_IVC_UPDATE, ivc);
+   /* wait for 5ms before signalling the interrupt is masked */
+   time_wait_ms(5);
}
 
 	return OPAL_SUCCESS;


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v12] cpufreq: powernv: Add sysfs attributes to show throttle stats

2016-03-22 Thread Shilpasri G Bhat
Create sysfs attributes to export throttle information in
/sys/devices/system/cpu/cpuX/cpufreq/throttle_stats directory. The
newly added sysfs files are as follows:

1)/sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/turbo_stat
2)/sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/sub-turbo_stat
3)/sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/unthrottle
4)/sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/powercap
5)/sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/overtemp
6)/sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/supply_fault
7)/sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/overcurrent
8)/sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/occ_reset

Detailed explanation of each attribute is added to
Documentation/ABI/testing/sysfs-devices-system-cpu

CC: linux-...@vger.kernel.org
Signed-off-by: Shilpasri G Bhat 
---
Changes from v11:
- Removed '*create_throttle_sysfs' 
- policy->driver_data is used instead to maintain the flag for
  one-time creation of throttle sysfs files.

Changes from v10:
- Removed policy_notifiers to use driver->init() instead to create sysfs
- sysfs attributes are removed by kobject_put(policy->kobj)
- Rebased on top of
  http://lkml.iu.edu/hypermail/linux/kernel/1603.2/02268.html

Changes from v9:
- Modified documentation.
- s/throttle_nominal/throttle_sub_turbo

Changes from v8:
- Moved the sysfs attributes from cpu/cpufreq/chipX to 
cpuX/cpufreq/throttle_stats
- Adhering to one-value-per-file, replace throttle_table with multiple
  sysfs files.
- Using CPUFREQ_POLICY_NOTIFIER to add/remove attribute_group.

 Documentation/ABI/testing/sysfs-devices-system-cpu | 69 
 drivers/cpufreq/powernv-cpufreq.c  | 74 +-
 2 files changed, 140 insertions(+), 3 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu 
b/Documentation/ABI/testing/sysfs-devices-system-cpu
index b683e8e..1650133 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -271,3 +271,72 @@ Description:   Parameters for the CPU cache attributes
- WriteBack: data is written only to the cache line and
 the modified cache line is written to main
 memory only when it is replaced
+
+What:  /sys/devices/system/cpu/cpuX/cpufreq/throttle_stats
+   /sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/turbo_stat
+   
/sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/sub_turbo_stat
+   /sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/unthrottle
+   /sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/powercap
+   /sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/overtemp
+   /sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/supply_fault
+   /sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/overcurrent
+   /sys/devices/system/cpu/cpuX/cpufreq/throttle_stats/occ_reset
+Date:  March 2016
+Contact:   Linux kernel mailing list 
+   Linux for PowerPC mailing list 
+Description:   POWERNV CPUFreq driver's frequency throttle stats directory and
+   attributes
+
+   'cpuX/cpufreq/throttle_stats' directory contains the CPU 
frequency
+   throttle stat attributes for the chip. The throttle stats of a 
cpu
+   is common across all the cpus belonging to a chip. Below are the
+   throttle attributes exported in the 'throttle_stats' directory:
+
+   - turbo_stat : This file gives the total number of times the max
+   frequency is throttled to lower frequency in turbo (at and above
+   nominal frequency) range of frequencies.
+
+   - sub_turbo_stat : This file gives the total number of times the
+   max frequency is throttled to lower frequency in sub-turbo(below
+   nominal frequency) range of frequencies.
+
+   - unthrottle : This file gives the total number of times the max
+   frequency is unthrottled after being throttled.
+
+   - powercap : This file gives the total number of times the max
+   frequency is throttled due to 'Power Capping'.
+
+   - overtemp : This file gives the total number of times the max
+   frequency is throttled due to 'CPU Over Temperature'.
+
+   - supply_fault : This file gives the total number of times the
+   max frequency is throttled due to 'Power Supply Failure'.
+
+   - overcurrent : This file gives the total number of times the
+   max frequency is throttled due to 'Overcurrent'.
+
+   - occ_reset : This file gives the total number of times the max
+   

[PATCH] Added a 5ms wait after a msi-irq is masked

2016-03-22 Thread Philippe Bergheaud
From: Vaibhav Jain 

Adds a 5ms wait to phb3_msi_set_xive after the interrupt is masked so
that the kernel delays cleanup until an irq if its in-flight is
handled. The value 5ms is the worst case time needed by an irq to be
presented to the host after its generated.

Signed-off-by: Vaibhav Jain 
---
This patch requires the following patches:
https://patchwork.ozlabs.org/patch/581764/
https://patchwork.ozlabs.org/patch/581765/

 hw/phb3.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/hw/phb3.c b/hw/phb3.c
index fbdcb9e..e5d49b2 100644
--- a/hw/phb3.c
+++ b/hw/phb3.c
@@ -1751,6 +1751,8 @@ static int64_t phb3_msi_set_xive(void *data,
PHB_IVC_UPDATE_ENABLE_Q |
PHB_IVC_UPDATE_ENABLE_GEN;
out_be64(p->regs + PHB_IVC_UPDATE, ivc);
+   /* wait for 5ms before signalling the interrupt is masked */
+   time_wait_ms(5);
}
 
return OPAL_SUCCESS;
-- 
2.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH] arch: powerpc: mm: fixed spelling error.

2016-03-22 Thread mistryalok
Fixed spelling error.
Signed-off-by: Alok Mistry 
---
 arch/powerpc/mm/hash64_64k.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c
index b2d659c..fe89a6d 100644
--- a/arch/powerpc/mm/hash64_64k.c
+++ b/arch/powerpc/mm/hash64_64k.c
@@ -91,7 +91,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, 
unsigned long vsid,
!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) {
 
/*
-* No CPU has hugepages but lacks no execute, so we
+* No CPU has huge pages but lacks no execute, so we
 * don't need to worry about that case
 */
rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap);
-- 
1.9.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] kvm-pr: manage illegal instructions

2016-03-22 Thread Laurent Vivier
Hi,

as Paolo has merged the test into kvm-unit-tests, this patch (and
original bug) can be now tested with it.

git://git.kernel.org/pub/scm/virt/kvm/kvm-unit-tests.git

at least:

be9b007 powerpc: add test to check invalid instruction trap

Run this with KVM-PR and check your dmesg:

qemu-system-ppc64 -machine pseries,accel=kvm \
  -bios powerpc/boot_rom.bin \
  -display none -serial stdio \
  -kernel powerpc/emulator.elf -smp 1

Laurent

On 15/03/2016 21:18, Laurent Vivier wrote:
> While writing some instruction tests for kvm-unit-tests for powerpc,
> I've found that illegal instructions are not managed correctly with kvm-pr,
> while it is fine with kvm-hv.
> 
> When an illegal instruction (like ".long 0") is processed by kvm-pr,
> the kernel logs are filled with:
> 
>  Couldn't emulate instruction 0x (op 0 xop 0)
>  kvmppc_handle_exit_pr: emulation at 700 failed ()
> 
> While the exception handler receives an interrupt for each instruction
> executed after the illegal instruction.
> 
> Signed-off-by: Laurent Vivier 
> ---
>  arch/powerpc/kvm/book3s_emulate.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/kvm/book3s_emulate.c 
> b/arch/powerpc/kvm/book3s_emulate.c
> index 2afdb9c..4ee969d 100644
> --- a/arch/powerpc/kvm/book3s_emulate.c
> +++ b/arch/powerpc/kvm/book3s_emulate.c
> @@ -99,7 +99,6 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct 
> kvm_vcpu *vcpu,
>  
>   switch (get_op(inst)) {
>   case 0:
> - emulated = EMULATE_FAIL;
>   if ((kvmppc_get_msr(vcpu) & MSR_LE) &&
>   (inst == swab32(inst_sc))) {
>   /*
> @@ -112,6 +111,9 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct 
> kvm_vcpu *vcpu,
>   kvmppc_set_gpr(vcpu, 3, EV_UNIMPLEMENTED);
>   kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4);
>   emulated = EMULATE_DONE;
> + } else {
> + kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
> + emulated = EMULATE_AGAIN;
>   }
>   break;
>   case 19:
> 
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] powerpc:dts:p2041rdb: enable gpio-expander

2016-03-22 Thread Nora Björklund
On Mon, 2016-03-21 at 12:58 -0500, Scott Wood wrote:
> On Mon, 2016-03-21 at 11:48 +0100, Nora Björklund wrote:
> > 
> > Enable the gpio-expander pca9672 on p2041rdb. The expander
> > has been present on the p2041rdb all along, however not in
> > the device tree.
> > 
> > Signed-off-by: Nora Björklund 
> > ---
> >  arch/powerpc/boot/dts/fsl/p2041rdb.dts | 7 +++
> >  1 file changed, 7 insertions(+)
> > 
> > diff --git a/arch/powerpc/boot/dts/fsl/p2041rdb.dts
> > b/arch/powerpc/boot/dts/fsl/p2041rdb.dts
> > index e9bd894..b1ee561 100644
> > --- a/arch/powerpc/boot/dts/fsl/p2041rdb.dts
> > +++ b/arch/powerpc/boot/dts/fsl/p2041rdb.dts
> > @@ -134,6 +134,13 @@
> >     };
> >     };
> >  
> > +   i2c@119100 {
> > +   gpio@22 {
> > +   compatible = "pcf8575";
> > +   reg = <0x22>;
> > +   };
> > +   };
> The compatible should be "nxp,pcf8575" and it needs the gpio-
> controller and
> #gpio-cells properties.
Sorry, the compatible should actually be "nxp,pca9672"..
> 
> Is there an interrupt line connected to it?
PCA9672 has an interrupt line, and it should be connected to irqpin0 if
the namegiving in the p2041rdb UG is following the same pattern as the
kernel.

Will try to test and update during the week.

/Nora
> :q

> See Documentation/devicetree/bindings/gpio/gpio-pcf857x.txt
> 
> -Scott
> 
> ___
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 05/14] powerpc/mm: Replace _PAGE_USER with _PAGE_PRIVILEGED

2016-03-22 Thread Michael Neuling
On Mon, 2016-03-07 at 19:09 +0530, Aneesh Kumar K.V wrote:
> _PAGE_PRIVILEGED means the page can be accessed only by kernel. This is done
> to keep pte bits similar to PowerISA 3.0 radix PTE format. User
> pages are now makred by clearing _PAGE_PRIVILEGED bit.
> 
> Previously we allowed kernel to have a privileged page
> in the lower address range(USER_REGION). With this patch such access
> is denied.
> 
> We also prevent a kernel access to a non-privileged page in
> higher address range (ie, REGION_ID != 0). Both the above access
> scenario should never happen.

A few comments below.  I didn't find any issues, just some potential
cleanups.

Mikey

> Signed-off-by: Aneesh Kumar K.V 
> ---
>  arch/powerpc/include/asm/book3s/64/hash.h| 34 
> ++--
>  arch/powerpc/include/asm/book3s/64/pgtable.h | 18 ++-
>  arch/powerpc/mm/hash64_4k.c  |  2 +-
>  arch/powerpc/mm/hash64_64k.c |  4 ++--
>  arch/powerpc/mm/hash_utils_64.c  | 17 --
>  arch/powerpc/mm/hugepage-hash64.c|  2 +-
>  arch/powerpc/mm/hugetlbpage-hash64.c |  3 ++-
>  arch/powerpc/mm/hugetlbpage.c|  2 +-
>  arch/powerpc/mm/pgtable.c| 15 ++--
>  arch/powerpc/mm/pgtable_64.c | 15 +---
>  arch/powerpc/platforms/cell/spufs/fault.c|  2 +-
>  drivers/misc/cxl/fault.c |  5 ++--
>  12 files changed, 80 insertions(+), 39 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/book3s/64/hash.h 
> b/arch/powerpc/include/asm/book3s/64/hash.h
> index f092d83fa623..fbefbaa92736 100644
> --- a/arch/powerpc/include/asm/book3s/64/hash.h
> +++ b/arch/powerpc/include/asm/book3s/64/hash.h
> @@ -20,7 +20,7 @@
>  #define _PAGE_READ   0x4 /* read access allowed */
>  #define _PAGE_RW (_PAGE_READ | _PAGE_WRITE)
>  #define _PAGE_RWX(_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)
> -#define _PAGE_USER   0x8 /* page may be accessed by userspace */
> +#define _PAGE_PRIVILEGED 0x8 /* page can only be access by kernel */

/* page can only be accessed by kernel */

Or just

/* kernel access only */

>  #define _PAGE_GUARDED0x00010 /* G: guarded (side-effect) 
> page */
>  /* M (memory coherence) is always set in the HPTE, so we don't need it here 
> */
>  #define _PAGE_COHERENT   0x0
> @@ -114,10 +114,13 @@
>  #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
>  #endif /* CONFIG_PPC_MM_SLICES */
>  
> -/* No separate kernel read-only */
> -#define _PAGE_KERNEL_RW  (_PAGE_RW | _PAGE_DIRTY) /* user access 
> blocked by key */
> +/*
> + * No separate kernel read-only, user access blocked by key
> + */
> +#define _PAGE_KERNEL_RW  (_PAGE_PRIVILEGED | _PAGE_RW | 
> _PAGE_DIRTY)
>  #define _PAGE_KERNEL_RO   _PAGE_KERNEL_RW
> -#define _PAGE_KERNEL_RWX (_PAGE_DIRTY | _PAGE_RW | _PAGE_EXEC)
> +#define _PAGE_KERNEL_RWX (_PAGE_PRIVILEGED | _PAGE_DIRTY | \
> +  _PAGE_RW | _PAGE_EXEC)
>  
>  /* Strong Access Ordering */
>  #define _PAGE_SAO(_PAGE_WRITETHRU | _PAGE_NO_CACHE | 
> _PAGE_COHERENT)
> @@ -149,7 +152,7 @@
>   */
>  #define PAGE_PROT_BITS   (_PAGE_GUARDED | _PAGE_COHERENT | 
> _PAGE_NO_CACHE | \
>_PAGE_WRITETHRU | _PAGE_4K_PFN | \
> -  _PAGE_USER | _PAGE_ACCESSED |  _PAGE_READ |\
> +  _PAGE_PRIVILEGED | _PAGE_ACCESSED |  _PAGE_READ |\
>_PAGE_WRITE |  _PAGE_DIRTY | _PAGE_EXEC | \
>_PAGE_SOFT_DIRTY)
>  /*
> @@ -171,16 +174,13 @@
>   *
>   * Note due to the way vm flags are laid out, the bits are XWR
>   */
> -#define PAGE_NONE__pgprot(_PAGE_BASE)
> -#define PAGE_SHARED  __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
> -#define PAGE_SHARED_X__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | \
> -  _PAGE_EXEC)
> -#define PAGE_COPY__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ)
> -#define PAGE_COPY_X  __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ| \
> -  _PAGE_EXEC)
> -#define PAGE_READONLY__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ)
> -#define PAGE_READONLY_X  __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ| \
> -  _PAGE_EXEC)
> +#define PAGE_NONE__pgprot(_PAGE_BASE | _PAGE_PRIVILEGED)
> +#define PAGE_SHARED  __pgprot(_PAGE_BASE | _PAGE_RW)
> +#define PAGE_SHARED_X__pgprot(_PAGE_BASE | _PAGE_RW | _PAGE_EXEC)
> +#define PAGE_COPY__pgprot(_PAGE_BASE | _PAGE_READ)
> +#define PAGE_COPY_X  __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC)
> +#define PAGE_READONLY__pgprot(_PAGE_BASE | _PAGE_READ)
> +#define PAGE_READONLY_X  __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC)

Eyeballing these, they seemed to have been converted ok

>  
>  #define __P000   PAGE_NONE
>  #define