[PATCH v11 9/9] powerpc: clean stack pointers naming

2018-12-09 Thread Christophe Leroy
Some stack pointers used to also be thread_info pointers
and were called tp. Now that they are only stack pointers,
rename them sp.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/irq.c  | 17 +++--
 arch/powerpc/kernel/setup_64.c | 20 ++--
 2 files changed, 17 insertions(+), 20 deletions(-)

diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 62cfccf4af89..754f0efc507b 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -659,21 +659,21 @@ void __do_irq(struct pt_regs *regs)
 void do_IRQ(struct pt_regs *regs)
 {
struct pt_regs *old_regs = set_irq_regs(regs);
-   void *curtp, *irqtp, *sirqtp;
+   void *cursp, *irqsp, *sirqsp;
 
/* Switch to the irq stack to handle this */
-   curtp = (void *)(current_stack_pointer() & ~(THREAD_SIZE - 1));
-   irqtp = hardirq_ctx[raw_smp_processor_id()];
-   sirqtp = softirq_ctx[raw_smp_processor_id()];
+   cursp = (void *)(current_stack_pointer() & ~(THREAD_SIZE - 1));
+   irqsp = hardirq_ctx[raw_smp_processor_id()];
+   sirqsp = softirq_ctx[raw_smp_processor_id()];
 
/* Already there ? */
-   if (unlikely(curtp == irqtp || curtp == sirqtp)) {
+   if (unlikely(cursp == irqsp || cursp == sirqsp)) {
__do_irq(regs);
set_irq_regs(old_regs);
return;
}
/* Switch stack and call */
-   call_do_irq(regs, irqtp);
+   call_do_irq(regs, irqsp);
 
set_irq_regs(old_regs);
 }
@@ -732,10 +732,7 @@ void irq_ctx_init(void)
 
 void do_softirq_own_stack(void)
 {
-   void *irqtp;
-
-   irqtp = softirq_ctx[smp_processor_id()];
-   call_do_softirq(irqtp);
+   call_do_softirq(softirq_ctx[smp_processor_id()]);
 }
 
 irq_hw_number_t virq_to_hw(unsigned int virq)
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 0b227d0891ec..49765ccbc8c0 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -718,22 +718,22 @@ void __init emergency_stack_init(void)
limit = min(ppc64_bolted_size(), ppc64_rma_size);
 
for_each_possible_cpu(i) {
-   void *ti;
+   void *sp;
 
-   ti = alloc_stack(limit, i);
-   memset(ti, 0, THREAD_SIZE);
-   paca_ptrs[i]->emergency_sp = ti + THREAD_SIZE;
+   sp = alloc_stack(limit, i);
+   memset(sp, 0, THREAD_SIZE);
+   paca_ptrs[i]->emergency_sp = sp + THREAD_SIZE;
 
 #ifdef CONFIG_PPC_BOOK3S_64
/* emergency stack for NMI exception handling. */
-   ti = alloc_stack(limit, i);
-   memset(ti, 0, THREAD_SIZE);
-   paca_ptrs[i]->nmi_emergency_sp = ti + THREAD_SIZE;
+   sp = alloc_stack(limit, i);
+   memset(sp, 0, THREAD_SIZE);
+   paca_ptrs[i]->nmi_emergency_sp = sp + THREAD_SIZE;
 
/* emergency stack for machine check exception handling. */
-   ti = alloc_stack(limit, i);
-   memset(ti, 0, THREAD_SIZE);
-   paca_ptrs[i]->mc_emergency_sp = ti + THREAD_SIZE;
+   sp = alloc_stack(limit, i);
+   memset(sp, 0, THREAD_SIZE);
+   paca_ptrs[i]->mc_emergency_sp = sp + THREAD_SIZE;
 #endif
}
 }
-- 
2.13.3



[PATCH v11 8/9] powerpc/64: Remove CURRENT_THREAD_INFO

2018-12-09 Thread Christophe Leroy
Now that current_thread_info is located at the beginning of 'current'
task struct, CURRENT_THREAD_INFO macro is not really needed any more.

This patch replaces it by loads of the value at PACACURRENT(r13).

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/exception-64s.h   |  4 ++--
 arch/powerpc/include/asm/thread_info.h |  4 
 arch/powerpc/kernel/entry_64.S | 10 +-
 arch/powerpc/kernel/exceptions-64e.S   |  2 +-
 arch/powerpc/kernel/exceptions-64s.S   |  2 +-
 arch/powerpc/kernel/idle_book3e.S  |  2 +-
 arch/powerpc/kernel/idle_power4.S  |  2 +-
 arch/powerpc/kernel/trace/ftrace_64_mprofile.S |  6 +++---
 8 files changed, 14 insertions(+), 18 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h 
b/arch/powerpc/include/asm/exception-64s.h
index 3b4767ed3ec5..dd6a5ae7a769 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -671,7 +671,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 
 #define RUNLATCH_ON\
 BEGIN_FTR_SECTION  \
-   CURRENT_THREAD_INFO(r3, r1);\
+   ld  r3, PACACURRENT(r13);   \
ld  r4,TI_LOCAL_FLAGS(r3);  \
andi.   r0,r4,_TLF_RUNLATCH;\
beqlppc64_runlatch_on_trampoline;   \
@@ -721,7 +721,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CTRL)
 #ifdef CONFIG_PPC_970_NAP
 #define FINISH_NAP \
 BEGIN_FTR_SECTION  \
-   CURRENT_THREAD_INFO(r11, r1);   \
+   ld  r11, PACACURRENT(r13);  \
ld  r9,TI_LOCAL_FLAGS(r11); \
andi.   r10,r9,_TLF_NAPPING;\
bnelpower4_fixup_nap;   \
diff --git a/arch/powerpc/include/asm/thread_info.h 
b/arch/powerpc/include/asm/thread_info.h
index c959b8d66cac..8e1d0195ac36 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -17,10 +17,6 @@
 
 #define THREAD_SIZE(1 << THREAD_SHIFT)
 
-#ifdef CONFIG_PPC64
-#define CURRENT_THREAD_INFO(dest, sp)  stringify_in_c(ld dest, 
PACACURRENT(r13))
-#endif
-
 #ifndef __ASSEMBLY__
 #include 
 #include 
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 03cbf409c3f8..b017bd3da1ed 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -158,7 +158,7 @@ system_call:/* label this so stack 
traces look sane */
li  r10,IRQS_ENABLED
std r10,SOFTE(r1)
 
-   CURRENT_THREAD_INFO(r11, r1)
+   ld  r11, PACACURRENT(r13)
ld  r10,TI_FLAGS(r11)
andi.   r11,r10,_TIF_SYSCALL_DOTRACE
bne .Lsyscall_dotrace   /* does not return */
@@ -205,7 +205,7 @@ system_call:/* label this so stack 
traces look sane */
ld  r3,RESULT(r1)
 #endif
 
-   CURRENT_THREAD_INFO(r12, r1)
+   ld  r12, PACACURRENT(r13)
 
ld  r8,_MSR(r1)
 #ifdef CONFIG_PPC_BOOK3S
@@ -336,7 +336,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 
/* Repopulate r9 and r10 for the syscall path */
addir9,r1,STACK_FRAME_OVERHEAD
-   CURRENT_THREAD_INFO(r10, r1)
+   ld  r10, PACACURRENT(r13)
ld  r10,TI_FLAGS(r10)
 
cmpldi  r0,NR_syscalls
@@ -734,7 +734,7 @@ _GLOBAL(ret_from_except_lite)
mtmsrd  r10,1 /* Update machine state */
 #endif /* CONFIG_PPC_BOOK3E */
 
-   CURRENT_THREAD_INFO(r9, r1)
+   ld  r9, PACACURRENT(r13)
ld  r3,_MSR(r1)
 #ifdef CONFIG_PPC_BOOK3E
ld  r10,PACACURRENT(r13)
@@ -848,7 +848,7 @@ resume_kernel:
 1: bl  preempt_schedule_irq
 
/* Re-test flags and eventually loop */
-   CURRENT_THREAD_INFO(r9, r1)
+   ld  r9, PACACURRENT(r13)
ld  r4,TI_FLAGS(r9)
andi.   r0,r4,_TIF_NEED_RESCHED
bne 1b
diff --git a/arch/powerpc/kernel/exceptions-64e.S 
b/arch/powerpc/kernel/exceptions-64e.S
index 231d066b4a3d..dfafcd0af009 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -469,7 +469,7 @@ exc_##n##_bad_stack:
\
  * interrupts happen before the wait instruction.
  */
 #define CHECK_NAPPING()
\
-   CURRENT_THREAD_INFO(r11, r1);   \
+   ld  r11, PACACURRENT(r13);  \
ld  r10,TI_LOCAL_FLAGS(r11);\
andi.   r9,r10,_TLF_NAPPING;\
beq+1f; \
diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 89d32bb79d5e..1cbe1a78df57 100644

[PATCH v11 7/9] powerpc/32: Remove CURRENT_THREAD_INFO and rename TI_CPU

2018-12-09 Thread Christophe Leroy
Now that thread_info is similar to task_struct, it's address is in r2
so CURRENT_THREAD_INFO() macro is useless. This patch removes it.

At the same time, as the 'cpu' field is not anymore in thread_info,
this patch renames it to TASK_CPU.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/Makefile  |  2 +-
 arch/powerpc/include/asm/thread_info.h |  2 --
 arch/powerpc/kernel/asm-offsets.c  |  2 +-
 arch/powerpc/kernel/entry_32.S | 43 --
 arch/powerpc/kernel/epapr_hcalls.S |  5 ++--
 arch/powerpc/kernel/head_fsl_booke.S   |  5 ++--
 arch/powerpc/kernel/idle_6xx.S |  8 +++
 arch/powerpc/kernel/idle_e500.S|  8 +++
 arch/powerpc/kernel/misc_32.S  |  3 +--
 arch/powerpc/mm/hash_low_32.S  | 14 ---
 arch/powerpc/sysdev/6xx-suspend.S  |  5 ++--
 11 files changed, 35 insertions(+), 62 deletions(-)

diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 1135cb0b7e48..ad2fd4ba916f 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -425,7 +425,7 @@ ifdef CONFIG_SMP
 prepare: task_cpu_prepare
 
 task_cpu_prepare: prepare0
-   $(eval KBUILD_CFLAGS += -D_TASK_CPU=$(shell awk '{if ($$2 == "TI_CPU") 
print $$3;}' include/generated/asm-offsets.h))
+   $(eval KBUILD_CFLAGS += -D_TASK_CPU=$(shell awk '{if ($$2 == 
"TASK_CPU") print $$3;}' include/generated/asm-offsets.h))
 endif
 
 # Check toolchain versions:
diff --git a/arch/powerpc/include/asm/thread_info.h 
b/arch/powerpc/include/asm/thread_info.h
index d91523c2c7d8..c959b8d66cac 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -19,8 +19,6 @@
 
 #ifdef CONFIG_PPC64
 #define CURRENT_THREAD_INFO(dest, sp)  stringify_in_c(ld dest, 
PACACURRENT(r13))
-#else
-#define CURRENT_THREAD_INFO(dest, sp)  stringify_in_c(mr dest, r2)
 #endif
 
 #ifndef __ASSEMBLY__
diff --git a/arch/powerpc/kernel/asm-offsets.c 
b/arch/powerpc/kernel/asm-offsets.c
index 94ac190a0b16..03439785c2ea 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -96,7 +96,7 @@ int main(void)
 #endif /* CONFIG_PPC64 */
OFFSET(TASK_STACK, task_struct, stack);
 #ifdef CONFIG_SMP
-   OFFSET(TI_CPU, task_struct, cpu);
+   OFFSET(TASK_CPU, task_struct, cpu);
 #endif
 
 #ifdef CONFIG_LIVEPATCH
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index b547bd4168d8..52a061f14c7d 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -168,8 +168,7 @@ transfer_to_handler:
tophys(r11,r11)
addir11,r11,global_dbcr0@l
 #ifdef CONFIG_SMP
-   CURRENT_THREAD_INFO(r9, r1)
-   lwz r9,TI_CPU(r9)
+   lwz r9,TASK_CPU(r2)
slwir9,r9,3
add r11,r11,r9
 #endif
@@ -180,8 +179,7 @@ transfer_to_handler:
stw r12,4(r11)
 #endif
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-   CURRENT_THREAD_INFO(r9, r1)
-   tophys(r9, r9)
+   tophys(r9, r2)
ACCOUNT_CPU_USER_ENTRY(r9, r11, r12)
 #endif
 
@@ -195,8 +193,7 @@ transfer_to_handler:
ble-stack_ovf   /* then the kernel stack overflowed */
 5:
 #if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500)
-   CURRENT_THREAD_INFO(r9, r1)
-   tophys(r9,r9)   /* check local flags */
+   tophys(r9,r2)   /* check local flags */
lwz r12,TI_LOCAL_FLAGS(r9)
mtcrf   0x01,r12
bt- 31-TLF_NAPPING,4f
@@ -345,8 +342,7 @@ _GLOBAL(DoSyscall)
mtmsr   r11
 1:
 #endif /* CONFIG_TRACE_IRQFLAGS */
-   CURRENT_THREAD_INFO(r10, r1)
-   lwz r11,TI_FLAGS(r10)
+   lwz r11,TI_FLAGS(r2)
andi.   r11,r11,_TIF_SYSCALL_DOTRACE
bne-syscall_dotrace
 syscall_dotrace_cont:
@@ -379,13 +375,12 @@ ret_from_syscall:
lwz r3,GPR3(r1)
 #endif
mr  r6,r3
-   CURRENT_THREAD_INFO(r12, r1)
/* disable interrupts so current_thread_info()->flags can't change */
LOAD_MSR_KERNEL(r10,MSR_KERNEL) /* doesn't include MSR_EE */
/* Note: We don't bother telling lockdep about it */
SYNC
MTMSRD(r10)
-   lwz r9,TI_FLAGS(r12)
+   lwz r9,TI_FLAGS(r2)
li  r8,-MAX_ERRNO
andi.   
r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
bne-syscall_exit_work
@@ -432,8 +427,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
andi.   r4,r8,MSR_PR
beq 3f
-   CURRENT_THREAD_INFO(r4, r1)
-   ACCOUNT_CPU_USER_EXIT(r4, r5, r7)
+   ACCOUNT_CPU_USER_EXIT(r2, r5, r7)
 3:
 #endif
lwz r4,_LINK(r1)
@@ -526,7 +520,7 @@ syscall_exit_work:
/* Clear per-syscall TIF flags if any are set.  */
 
li  r11,_TIF_PERSYSCALL_MASK
-   addir12,r12,TI_FLAGS
+   addir12,r2,TI_FLAGS
 3: lwarx   

[PATCH v11 6/9] powerpc: 'current_set' is now a table of task_struct pointers

2018-12-09 Thread Christophe Leroy
The table of pointers 'current_set' has been used for retrieving
the stack and current. They used to be thread_info pointers as
they were pointing to the stack and current was taken from the
'task' field of the thread_info.

Now, the pointers of 'current_set' table are now both pointers
to task_struct and pointers to thread_info.

As they are used to get current, and the stack pointer is
retrieved from current's stack field, this patch changes
their type to task_struct, and renames secondary_ti to
secondary_current.

Reviewed-by: Nicholas Piggin 
Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/asm-prototypes.h |  4 ++--
 arch/powerpc/kernel/head_32.S |  6 +++---
 arch/powerpc/kernel/head_44x.S|  4 ++--
 arch/powerpc/kernel/head_fsl_booke.S  |  4 ++--
 arch/powerpc/kernel/smp.c | 10 --
 5 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/include/asm/asm-prototypes.h 
b/arch/powerpc/include/asm/asm-prototypes.h
index 6f201b199c02..f5347fbaf03c 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -23,8 +23,8 @@
 #include 
 
 /* SMP */
-extern struct thread_info *current_set[NR_CPUS];
-extern struct thread_info *secondary_ti;
+extern struct task_struct *current_set[NR_CPUS];
+extern struct task_struct *secondary_current;
 void start_secondary(void *unused);
 
 /* kexec */
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index 9dd85af46669..43bff4deca3e 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -842,9 +842,9 @@ __secondary_start:
 #endif /* CONFIG_PPC_BOOK3S_32 */
 
/* get current's stack and current */
-   lis r1,secondary_ti@ha
-   tophys(r1,r1)
-   lwz r2,secondary_ti@l(r1)
+   lis r2,secondary_current@ha
+   tophys(r2,r2)
+   lwz r2,secondary_current@l(r2)
tophys(r1,r2)
lwz r1,TASK_STACK(r1)
 
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index 2c7e90f36358..48e4de4dfd0c 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -1021,8 +1021,8 @@ _GLOBAL(start_secondary_47x)
/* Now we can get our task struct and real stack pointer */
 
/* Get current's stack and current */
-   lis r1,secondary_ti@ha
-   lwz r2,secondary_ti@l(r1)
+   lis r2,secondary_current@ha
+   lwz r2,secondary_current@l(r2)
lwz r1,TASK_STACK(r2)
 
/* Current stack pointer */
diff --git a/arch/powerpc/kernel/head_fsl_booke.S 
b/arch/powerpc/kernel/head_fsl_booke.S
index b8a2b789677e..0d27bfff52dd 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -1076,8 +1076,8 @@ __secondary_start:
bl  call_setup_cpu
 
/* get current's stack and current */
-   lis r1,secondary_ti@ha
-   lwz r2,secondary_ti@l(r1)
+   lis r2,secondary_current@ha
+   lwz r2,secondary_current@l(r2)
lwz r1,TASK_STACK(r2)
 
/* stack */
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index aa4517686f90..a41fa8924004 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -76,7 +76,7 @@
 static DEFINE_PER_CPU(int, cpu_state) = { 0 };
 #endif
 
-struct thread_info *secondary_ti;
+struct task_struct *secondary_current;
 bool has_big_cores;
 
 DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
@@ -664,7 +664,7 @@ void smp_send_stop(void)
 }
 #endif /* CONFIG_NMI_IPI */
 
-struct thread_info *current_set[NR_CPUS];
+struct task_struct *current_set[NR_CPUS];
 
 static void smp_store_cpu_info(int id)
 {
@@ -929,7 +929,7 @@ void smp_prepare_boot_cpu(void)
paca_ptrs[boot_cpuid]->__current = current;
 #endif
set_numa_node(numa_cpu_lookup_table[boot_cpuid]);
-   current_set[boot_cpuid] = task_thread_info(current);
+   current_set[boot_cpuid] = current;
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
@@ -1014,15 +1014,13 @@ static bool secondaries_inhibited(void)
 
 static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle)
 {
-   struct thread_info *ti = task_thread_info(idle);
-
 #ifdef CONFIG_PPC64
paca_ptrs[cpu]->__current = idle;
paca_ptrs[cpu]->kstack = (unsigned long)task_stack_page(idle) +
 THREAD_SIZE - STACK_FRAME_OVERHEAD;
 #endif
idle->cpu = cpu;
-   secondary_ti = current_set[cpu] = ti;
+   secondary_current = current_set[cpu] = idle;
 }
 
 int __cpu_up(unsigned int cpu, struct task_struct *tidle)
-- 
2.13.3



[PATCH v11 5/9] powerpc: regain entire stack space

2018-12-09 Thread Christophe Leroy
thread_info is not anymore in the stack, so the entire stack
can now be used.

There is also no risk anymore of corrupting task_cpu(p) with a
stack overflow so the patch removes the test.

When doing this, an explicit test for NULL stack pointer is
needed in validate_sp() as it is not anymore implicitely covered
by the sizeof(thread_info) gap.

In the meantime, with the previous patch all pointers to the stacks
are not anymore pointers to thread_info so this patch changes them
to void*

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/irq.h   | 10 +-
 arch/powerpc/include/asm/processor.h |  3 +--
 arch/powerpc/kernel/asm-offsets.c|  1 -
 arch/powerpc/kernel/entry_32.S   | 14 --
 arch/powerpc/kernel/irq.c| 19 +--
 arch/powerpc/kernel/misc_32.S|  6 ++
 arch/powerpc/kernel/process.c| 32 +---
 arch/powerpc/kernel/setup_64.c   |  8 
 8 files changed, 38 insertions(+), 55 deletions(-)

diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
index 2efbae8d93be..966ddd4d2414 100644
--- a/arch/powerpc/include/asm/irq.h
+++ b/arch/powerpc/include/asm/irq.h
@@ -48,9 +48,9 @@ struct pt_regs;
  * Per-cpu stacks for handling critical, debug and machine check
  * level interrupts.
  */
-extern struct thread_info *critirq_ctx[NR_CPUS];
-extern struct thread_info *dbgirq_ctx[NR_CPUS];
-extern struct thread_info *mcheckirq_ctx[NR_CPUS];
+extern void *critirq_ctx[NR_CPUS];
+extern void *dbgirq_ctx[NR_CPUS];
+extern void *mcheckirq_ctx[NR_CPUS];
 extern void exc_lvl_ctx_init(void);
 #else
 #define exc_lvl_ctx_init()
@@ -59,8 +59,8 @@ extern void exc_lvl_ctx_init(void);
 /*
  * Per-cpu stacks for handling hard and soft interrupts.
  */
-extern struct thread_info *hardirq_ctx[NR_CPUS];
-extern struct thread_info *softirq_ctx[NR_CPUS];
+extern void *hardirq_ctx[NR_CPUS];
+extern void *softirq_ctx[NR_CPUS];
 
 extern void irq_ctx_init(void);
 void call_do_softirq(void *sp);
diff --git a/arch/powerpc/include/asm/processor.h 
b/arch/powerpc/include/asm/processor.h
index 15acb282a876..8179b64871ed 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -325,8 +325,7 @@ struct thread_struct {
 #define ARCH_MIN_TASKALIGN 16
 
 #define INIT_SP(sizeof(init_stack) + (unsigned long) 
_stack)
-#define INIT_SP_LIMIT \
-   (_ALIGN_UP(sizeof(struct thread_info), 16) + (unsigned long)_stack)
+#define INIT_SP_LIMIT  ((unsigned long)_stack)
 
 #ifdef CONFIG_SPE
 #define SPEFSCR_INIT \
diff --git a/arch/powerpc/kernel/asm-offsets.c 
b/arch/powerpc/kernel/asm-offsets.c
index 1fb52206c106..94ac190a0b16 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -92,7 +92,6 @@ int main(void)
DEFINE(SIGSEGV, SIGSEGV);
DEFINE(NMI_MASK, NMI_MASK);
 #else
-   DEFINE(THREAD_INFO_GAP, _ALIGN_UP(sizeof(struct thread_info), 16));
OFFSET(KSP_LIMIT, thread_struct, ksp_limit);
 #endif /* CONFIG_PPC64 */
OFFSET(TASK_STACK, task_struct, stack);
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 1f5a76283bd4..b547bd4168d8 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -97,14 +97,11 @@ crit_transfer_to_handler:
mfspr   r0,SPRN_SRR1
stw r0,_SRR1(r11)
 
-   /* set the stack limit to the current stack
-* and set the limit to protect the thread_info
-* struct
-*/
+   /* set the stack limit to the current stack */
mfspr   r8,SPRN_SPRG_THREAD
lwz r0,KSP_LIMIT(r8)
stw r0,SAVED_KSP_LIMIT(r11)
-   rlwimi  r0,r1,0,0,(31-THREAD_SHIFT)
+   rlwinm  r0,r1,0,0,(31 - THREAD_SHIFT)
stw r0,KSP_LIMIT(r8)
/* fall through */
 #endif
@@ -121,14 +118,11 @@ crit_transfer_to_handler:
mfspr   r0,SPRN_SRR1
stw r0,crit_srr1@l(0)
 
-   /* set the stack limit to the current stack
-* and set the limit to protect the thread_info
-* struct
-*/
+   /* set the stack limit to the current stack */
mfspr   r8,SPRN_SPRG_THREAD
lwz r0,KSP_LIMIT(r8)
stw r0,saved_ksp_limit@l(0)
-   rlwimi  r0,r1,0,0,(31-THREAD_SHIFT)
+   rlwinm  r0,r1,0,0,(31 - THREAD_SHIFT)
stw r0,KSP_LIMIT(r8)
/* fall through */
 #endif
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 3fdb6b6973cf..62cfccf4af89 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -618,9 +618,8 @@ static inline void check_stack_overflow(void)
sp = current_stack_pointer() & (THREAD_SIZE-1);
 
/* check for stack overflow: is there less than 2KB free? */
-   if (unlikely(sp < (sizeof(struct thread_info) + 2048))) {
-   pr_err("do_IRQ: stack overflow: %ld\n",
-   sp - sizeof(struct thread_info));
+  

[PATCH v11 4/9] powerpc: Activate CONFIG_THREAD_INFO_IN_TASK

2018-12-09 Thread Christophe Leroy
This patch activates CONFIG_THREAD_INFO_IN_TASK which
moves the thread_info into task_struct.

Moving thread_info into task_struct has the following advantages:
- It protects thread_info from corruption in the case of stack
overflows.
- Its address is harder to determine if stack addresses are
leaked, making a number of attacks more difficult.

This has the following consequences:
- thread_info is now located at the beginning of task_struct.
- The 'cpu' field is now in task_struct, and only exists when
CONFIG_SMP is active.
- thread_info doesn't have anymore the 'task' field.

This patch:
- Removes all recopy of thread_info struct when the stack changes.
- Changes the CURRENT_THREAD_INFO() macro to point to current.
- Selects CONFIG_THREAD_INFO_IN_TASK.
- Modifies raw_smp_processor_id() to get ->cpu from current without
including linux/sched.h to avoid circular inclusion and without
including asm/asm-offsets.h to avoid symbol names duplication
between ASM constants and C constants.

Signed-off-by: Christophe Leroy 
Reviewed-by: Nicholas Piggin 
---
 arch/powerpc/Kconfig   |  1 +
 arch/powerpc/Makefile  |  7 +
 arch/powerpc/include/asm/ptrace.h  |  2 +-
 arch/powerpc/include/asm/smp.h | 17 +++-
 arch/powerpc/include/asm/thread_info.h | 17 ++--
 arch/powerpc/kernel/asm-offsets.c  |  7 +++--
 arch/powerpc/kernel/entry_32.S |  9 +++
 arch/powerpc/kernel/exceptions-64e.S   | 11 
 arch/powerpc/kernel/head_32.S  |  6 ++---
 arch/powerpc/kernel/head_44x.S |  4 +--
 arch/powerpc/kernel/head_64.S  |  1 +
 arch/powerpc/kernel/head_booke.h   |  8 +-
 arch/powerpc/kernel/head_fsl_booke.S   |  7 +++--
 arch/powerpc/kernel/irq.c  | 47 +-
 arch/powerpc/kernel/kgdb.c | 28 
 arch/powerpc/kernel/machine_kexec_64.c |  6 ++---
 arch/powerpc/kernel/setup_64.c | 21 ---
 arch/powerpc/kernel/smp.c  |  2 +-
 arch/powerpc/net/bpf_jit32.h   |  5 ++--
 19 files changed, 52 insertions(+), 154 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 8ea7c2c02cbf..e312e92e3381 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -238,6 +238,7 @@ config PPC
select RTC_LIB
select SPARSE_IRQ
select SYSCTL_EXCEPTION_TRACE
+   select THREAD_INFO_IN_TASK
select VIRT_TO_BUS  if !PPC64
#
# Please keep this list sorted alphabetically.
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 40bbeeeb5b4a..1135cb0b7e48 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -421,6 +421,13 @@ else
 endif
 endif
 
+ifdef CONFIG_SMP
+prepare: task_cpu_prepare
+
+task_cpu_prepare: prepare0
+   $(eval KBUILD_CFLAGS += -D_TASK_CPU=$(shell awk '{if ($$2 == "TI_CPU") 
print $$3;}' include/generated/asm-offsets.h))
+endif
+
 # Check toolchain versions:
 # - gcc-4.6 is the minimum kernel-wide version so nothing required.
 checkbin:
diff --git a/arch/powerpc/include/asm/ptrace.h 
b/arch/powerpc/include/asm/ptrace.h
index 0b8a735b6d85..64271e562fed 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -157,7 +157,7 @@ extern int ptrace_put_reg(struct task_struct *task, int 
regno,
  unsigned long data);
 
 #define current_pt_regs() \
-   ((struct pt_regs *)((unsigned long)current_thread_info() + THREAD_SIZE) 
- 1)
+   ((struct pt_regs *)((unsigned long)task_stack_page(current) + 
THREAD_SIZE) - 1)
 /*
  * We use the least-significant bit of the trap field to indicate
  * whether we have saved the full set of registers, or only a
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index 41695745032c..0de717e16dd6 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -83,7 +83,22 @@ int is_cpu_dead(unsigned int cpu);
 /* 32-bit */
 extern int smp_hw_index[];
 
-#define raw_smp_processor_id() (current_thread_info()->cpu)
+/*
+ * This is particularly ugly: it appears we can't actually get the definition
+ * of task_struct here, but we need access to the CPU this task is running on.
+ * Instead of using task_struct we're using _TASK_CPU which is extracted from
+ * asm-offsets.h by kbuild to get the current processor ID.
+ *
+ * This also needs to be safeguarded when building asm-offsets.s because at
+ * that time _TASK_CPU is not defined yet. It could have been guarded by
+ * _TASK_CPU itself, but we want the build to fail if _TASK_CPU is missing
+ * when building something else than asm-offsets.s
+ */
+#ifdef GENERATING_ASM_OFFSETS
+#define raw_smp_processor_id() (0)
+#else
+#define raw_smp_processor_id() (*(unsigned int *)((void *)current + 
_TASK_CPU))
+#endif
 #define hard_smp_processor_id()(smp_hw_index[smp_processor_id()])
 
 static inline int 

[PATCH v11 3/9] powerpc: Prepare for moving thread_info into task_struct

2018-12-09 Thread Christophe Leroy
This patch cleans the powerpc kernel before activating
CONFIG_THREAD_INFO_IN_TASK:
- The purpose of the pointer given to call_do_softirq() and
call_do_irq() is to point the new stack ==> change it to void* and
rename it 'sp'
- Don't use CURRENT_THREAD_INFO() to locate the stack.
- Fix a few comments.
- Replace current_thread_info()->task by current
- Remove unnecessary casts to thread_info, as they'll become invalid
once thread_info is not in stack anymore.
- Rename THREAD_INFO to TASK_STASK: as it is in fact the offset of the
pointer to the stack in task_struct, this pointer will not be impacted
by the move of THREAD_INFO.
- Makes TASK_STACK available to PPC64. PPC64 will need it to get the
stack pointer from current once the thread_info have been moved.
- Modifies klp_init_thread_info() to take task_struct pointer argument.

Signed-off-by: Christophe Leroy 
Reviewed-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/irq.h   |  4 ++--
 arch/powerpc/include/asm/livepatch.h |  7 ---
 arch/powerpc/include/asm/processor.h |  4 ++--
 arch/powerpc/include/asm/reg.h   |  2 +-
 arch/powerpc/kernel/asm-offsets.c|  2 +-
 arch/powerpc/kernel/entry_32.S   |  2 +-
 arch/powerpc/kernel/entry_64.S   |  2 +-
 arch/powerpc/kernel/head_32.S|  4 ++--
 arch/powerpc/kernel/head_40x.S   |  4 ++--
 arch/powerpc/kernel/head_44x.S   |  2 +-
 arch/powerpc/kernel/head_8xx.S   |  2 +-
 arch/powerpc/kernel/head_booke.h |  4 ++--
 arch/powerpc/kernel/head_fsl_booke.S |  4 ++--
 arch/powerpc/kernel/irq.c|  2 +-
 arch/powerpc/kernel/misc_32.S|  4 ++--
 arch/powerpc/kernel/process.c|  8 
 arch/powerpc/kernel/setup-common.c   |  2 +-
 arch/powerpc/kernel/setup_32.c   | 15 +--
 arch/powerpc/kernel/smp.c|  4 +++-
 19 files changed, 38 insertions(+), 40 deletions(-)

diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
index ee39ce56b2a2..2efbae8d93be 100644
--- a/arch/powerpc/include/asm/irq.h
+++ b/arch/powerpc/include/asm/irq.h
@@ -63,8 +63,8 @@ extern struct thread_info *hardirq_ctx[NR_CPUS];
 extern struct thread_info *softirq_ctx[NR_CPUS];
 
 extern void irq_ctx_init(void);
-extern void call_do_softirq(struct thread_info *tp);
-extern void call_do_irq(struct pt_regs *regs, struct thread_info *tp);
+void call_do_softirq(void *sp);
+void call_do_irq(struct pt_regs *regs, void *sp);
 extern void do_IRQ(struct pt_regs *regs);
 extern void __init init_IRQ(void);
 extern void __do_irq(struct pt_regs *regs);
diff --git a/arch/powerpc/include/asm/livepatch.h 
b/arch/powerpc/include/asm/livepatch.h
index 47a03b9b528b..8a81d10ccc82 100644
--- a/arch/powerpc/include/asm/livepatch.h
+++ b/arch/powerpc/include/asm/livepatch.h
@@ -43,13 +43,14 @@ static inline unsigned long 
klp_get_ftrace_location(unsigned long faddr)
return ftrace_location_range(faddr, faddr + 16);
 }
 
-static inline void klp_init_thread_info(struct thread_info *ti)
+static inline void klp_init_thread_info(struct task_struct *p)
 {
+   struct thread_info *ti = task_thread_info(p);
/* + 1 to account for STACK_END_MAGIC */
-   ti->livepatch_sp = (unsigned long *)(ti + 1) + 1;
+   ti->livepatch_sp = end_of_stack(p) + 1;
 }
 #else
-static void klp_init_thread_info(struct thread_info *ti) { }
+static inline void klp_init_thread_info(struct task_struct *p) { }
 #endif /* CONFIG_LIVEPATCH */
 
 #endif /* _ASM_POWERPC_LIVEPATCH_H */
diff --git a/arch/powerpc/include/asm/processor.h 
b/arch/powerpc/include/asm/processor.h
index 692f7383d461..15acb282a876 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -40,7 +40,7 @@
 
 #ifndef __ASSEMBLY__
 #include 
-#include 
+#include 
 #include 
 #include 
 
@@ -326,7 +326,7 @@ struct thread_struct {
 
 #define INIT_SP(sizeof(init_stack) + (unsigned long) 
_stack)
 #define INIT_SP_LIMIT \
-   (_ALIGN_UP(sizeof(init_thread_info), 16) + (unsigned long) _stack)
+   (_ALIGN_UP(sizeof(struct thread_info), 16) + (unsigned long)_stack)
 
 #ifdef CONFIG_SPE
 #define SPEFSCR_INIT \
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 0d2139a0d5b9..f75eee7b0789 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -1060,7 +1060,7 @@
  * - SPRG9 debug exception scratch
  *
  * All 32-bit:
- * - SPRG3 current thread_info pointer
+ * - SPRG3 current thread_struct physical addr pointer
  *(virtual on BookE, physical on others)
  *
  * 32-bit classic:
diff --git a/arch/powerpc/kernel/asm-offsets.c 
b/arch/powerpc/kernel/asm-offsets.c
index 9ffc72ded73a..b2b52e002a76 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -90,10 +90,10 @@ int main(void)
DEFINE(SIGSEGV, SIGSEGV);
DEFINE(NMI_MASK, NMI_MASK);
 #else
-   OFFSET(THREAD_INFO, task_struct, stack);
DEFINE(THREAD_INFO_GAP, 

[PATCH v11 2/9] powerpc: Only use task_struct 'cpu' field on SMP

2018-12-09 Thread Christophe Leroy
When moving to CONFIG_THREAD_INFO_IN_TASK, the thread_info 'cpu' field
gets moved into task_struct and only defined when CONFIG_SMP is set.

This patch ensures that TI_CPU is only used when CONFIG_SMP is set and
that task_struct 'cpu' field is not used directly out of SMP code.

Signed-off-by: Christophe Leroy 
Reviewed-by: Nicholas Piggin 
---
 arch/powerpc/kernel/head_fsl_booke.S | 2 ++
 arch/powerpc/kernel/misc_32.S| 4 
 arch/powerpc/xmon/xmon.c | 2 +-
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/head_fsl_booke.S 
b/arch/powerpc/kernel/head_fsl_booke.S
index e2750b856c8f..05b574f416b3 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -243,8 +243,10 @@ set_ivor:
li  r0,0
stwur0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
 
+#ifdef CONFIG_SMP
CURRENT_THREAD_INFO(r22, r1)
stw r24, TI_CPU(r22)
+#endif
 
bl  early_init
 
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 57d2ffb2d45c..02b8cdd73792 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -183,10 +183,14 @@ _GLOBAL(low_choose_750fx_pll)
or  r4,r4,r5
mtspr   SPRN_HID1,r4
 
+#ifdef CONFIG_SMP
/* Store new HID1 image */
CURRENT_THREAD_INFO(r6, r1)
lwz r6,TI_CPU(r6)
slwir6,r6,2
+#else
+   li  r6, 0
+#endif
addis   r6,r6,nap_save_hid1@ha
stw r4,nap_save_hid1@l(r6)
 
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 372f4d80bcd6..5e5e44dcbf89 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -2997,7 +2997,7 @@ static void show_task(struct task_struct *tsk)
printf("%px %016lx %6d %6d %c %2d %s\n", tsk,
tsk->thread.ksp,
tsk->pid, rcu_dereference(tsk->parent)->pid,
-   state, task_thread_info(tsk)->cpu,
+   state, task_cpu(tsk),
tsk->comm);
 }
 
-- 
2.13.3



[PATCH v11 1/9] book3s/64: avoid circular header inclusion in mmu-hash.h

2018-12-09 Thread Christophe Leroy
When activating CONFIG_THREAD_INFO_IN_TASK, linux/sched.h
includes asm/current.h. This generates a circular dependency.
To avoid that, asm/processor.h shall not be included in mmu-hash.h

In order to do that, this patch moves into a new header called
asm/task_size_user64.h the information from asm/processor.h required
by mmu-hash.h

Signed-off-by: Christophe Leroy 
Reviewed-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/book3s/64/mmu-hash.h |  2 +-
 arch/powerpc/include/asm/processor.h  | 34 +-
 arch/powerpc/include/asm/task_size_user64.h   | 42 +++
 arch/powerpc/kvm/book3s_hv_hmi.c  |  1 +
 4 files changed, 45 insertions(+), 34 deletions(-)
 create mode 100644 arch/powerpc/include/asm/task_size_user64.h

diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h 
b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index 12e522807f9f..b2aba048301e 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -23,7 +23,7 @@
  */
 #include 
 #include 
-#include 
+#include 
 #include 
 
 /*
diff --git a/arch/powerpc/include/asm/processor.h 
b/arch/powerpc/include/asm/processor.h
index ee58526cb6c2..692f7383d461 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -95,40 +95,8 @@ void release_thread(struct task_struct *);
 #endif
 
 #ifdef CONFIG_PPC64
-/*
- * 64-bit user address space can have multiple limits
- * For now supported values are:
- */
-#define TASK_SIZE_64TB  (0x4000UL)
-#define TASK_SIZE_128TB (0x8000UL)
-#define TASK_SIZE_512TB (0x0002UL)
-#define TASK_SIZE_1PB   (0x0004UL)
-#define TASK_SIZE_2PB   (0x0008UL)
-/*
- * With 52 bits in the address we can support
- * upto 4PB of range.
- */
-#define TASK_SIZE_4PB   (0x0010UL)
 
-/*
- * For now 512TB is only supported with book3s and 64K linux page size.
- */
-#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_64K_PAGES)
-/*
- * Max value currently used:
- */
-#define TASK_SIZE_USER64   TASK_SIZE_4PB
-#define DEFAULT_MAP_WINDOW_USER64  TASK_SIZE_128TB
-#define TASK_CONTEXT_SIZE  TASK_SIZE_512TB
-#else
-#define TASK_SIZE_USER64   TASK_SIZE_64TB
-#define DEFAULT_MAP_WINDOW_USER64  TASK_SIZE_64TB
-/*
- * We don't need to allocate extended context ids for 4K page size, because
- * we limit the max effective address on this config to 64TB.
- */
-#define TASK_CONTEXT_SIZE  TASK_SIZE_64TB
-#endif
+#include 
 
 /*
  * 32-bit user address space is 4GB - 1 page
diff --git a/arch/powerpc/include/asm/task_size_user64.h 
b/arch/powerpc/include/asm/task_size_user64.h
new file mode 100644
index ..a4043075864b
--- /dev/null
+++ b/arch/powerpc/include/asm/task_size_user64.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_TASK_SIZE_USER64_H
+#define _ASM_POWERPC_TASK_SIZE_USER64_H
+
+#ifdef CONFIG_PPC64
+/*
+ * 64-bit user address space can have multiple limits
+ * For now supported values are:
+ */
+#define TASK_SIZE_64TB  (0x4000UL)
+#define TASK_SIZE_128TB (0x8000UL)
+#define TASK_SIZE_512TB (0x0002UL)
+#define TASK_SIZE_1PB   (0x0004UL)
+#define TASK_SIZE_2PB   (0x0008UL)
+/*
+ * With 52 bits in the address we can support
+ * upto 4PB of range.
+ */
+#define TASK_SIZE_4PB   (0x0010UL)
+
+/*
+ * For now 512TB is only supported with book3s and 64K linux page size.
+ */
+#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_64K_PAGES)
+/*
+ * Max value currently used:
+ */
+#define TASK_SIZE_USER64   TASK_SIZE_4PB
+#define DEFAULT_MAP_WINDOW_USER64  TASK_SIZE_128TB
+#define TASK_CONTEXT_SIZE  TASK_SIZE_512TB
+#else
+#define TASK_SIZE_USER64   TASK_SIZE_64TB
+#define DEFAULT_MAP_WINDOW_USER64  TASK_SIZE_64TB
+/*
+ * We don't need to allocate extended context ids for 4K page size, because
+ * we limit the max effective address on this config to 64TB.
+ */
+#define TASK_CONTEXT_SIZE  TASK_SIZE_64TB
+#endif
+
+#endif /* CONFIG_PPC64 */
+#endif /* _ASM_POWERPC_TASK_SIZE_USER64_H */
diff --git a/arch/powerpc/kvm/book3s_hv_hmi.c b/arch/powerpc/kvm/book3s_hv_hmi.c
index e3f738eb1cac..64b5011475c7 100644
--- a/arch/powerpc/kvm/book3s_hv_hmi.c
+++ b/arch/powerpc/kvm/book3s_hv_hmi.c
@@ -24,6 +24,7 @@
 #include 
 #include 
 #include 
+#include 
 
 void wait_for_subcore_guest_exit(void)
 {
-- 
2.13.3



[PATCH v10 0/9] powerpc: Switch to CONFIG_THREAD_INFO_IN_TASK

2018-12-09 Thread Christophe Leroy
The purpose of this serie is to activate CONFIG_THREAD_INFO_IN_TASK which
moves the thread_info into task_struct.

Moving thread_info into task_struct has the following advantages:
- It protects thread_info from corruption in the case of stack
overflows.
- Its address is harder to determine if stack addresses are
leaked, making a number of attacks more difficult.

Changes since v10:
 - Rebased on 21622a0d2023 ("Automatic merge of branches 'master', 'next' and 
'fixes' into merge")
  ==> Fixed conflict in setup_32.S

Changes since v9:
 - Rebased on 183cbf93be88 ("Automatic merge of branches 'master', 'next' and 
'fixes' into merge")
  ==> Fixed conflict on xmon

Changes since v8:
 - Rebased on e589b79e40d9 ("Automatic merge of branches 'master', 'next' and 
'fixes' into merge")
  ==> Main impact was conflicts due to commit 9a8dd708d547 ("memblock: rename 
memblock_alloc{_nid,_try_nid} to memblock_phys_alloc*")

Changes since v7:
 - Rebased on fb6c6ce7907d ("Automatic merge of branches 'master', 'next' and 
'fixes' into merge")

Changes since v6:
 - Fixed validate_sp() to exclude NULL sp in 'regain entire stack space' patch 
(early crash with CONFIG_KMEMLEAK)

Changes since v5:
 - Fixed livepatch_sp setup by using end_of_stack() instead of hardcoding
 - Fixed PPC_BPF_LOAD_CPU() macro

Changes since v4:
 - Fixed a build failure on 32bits SMP when include/generated/asm-offsets.h is 
not
 already existing, was due to spaces instead of a tab in the Makefile

Changes since RFC v3: (based on Nick's review)
 - Renamed task_size.h to task_size_user64.h to better relate to what it 
contains.
 - Handling of the isolation of thread_info cpu field inside CONFIG_SMP #ifdefs 
moved to a separate patch.
 - Removed CURRENT_THREAD_INFO macro completely.
 - Added a guard in asm/smp.h to avoid build failure before _TASK_CPU is 
defined.
 - Added a patch at the end to rename 'tp' pointers to 'sp' pointers
 - Renamed 'tp' into 'sp' pointers in preparation patch when relevant
 - Fixed a few commit logs
 - Fixed checkpatch report.

Changes since RFC v2:
 - Removed the modification of names in asm-offsets
 - Created a rule in arch/powerpc/Makefile to append the offset of current->cpu 
in CFLAGS
 - Modified asm/smp.h to use the offset set in CFLAGS
 - Squashed the renaming of THREAD_INFO to TASK_STACK in the preparation patch
 - Moved the modification of current_pt_regs in the patch activating 
CONFIG_THREAD_INFO_IN_TASK

Changes since RFC v1:
 - Removed the first patch which was modifying header inclusion order in timer
 - Modified some names in asm-offsets to avoid conflicts when including 
asm-offsets in C files
 - Modified asm/smp.h to avoid having to include linux/sched.h (using 
asm-offsets instead)
 - Moved some changes from the activation patch to the preparation patch.

Christophe Leroy (9):
  book3s/64: avoid circular header inclusion in mmu-hash.h
  powerpc: Only use task_struct 'cpu' field on SMP
  powerpc: Prepare for moving thread_info into task_struct
  powerpc: Activate CONFIG_THREAD_INFO_IN_TASK
  powerpc: regain entire stack space
  powerpc: 'current_set' is now a table of task_struct pointers
  powerpc/32: Remove CURRENT_THREAD_INFO and rename TI_CPU
  powerpc/64: Remove CURRENT_THREAD_INFO
  powerpc: clean stack pointers naming

 arch/powerpc/Kconfig   |  1 +
 arch/powerpc/Makefile  |  7 +++
 arch/powerpc/include/asm/asm-prototypes.h  |  4 +-
 arch/powerpc/include/asm/book3s/64/mmu-hash.h  |  2 +-
 arch/powerpc/include/asm/exception-64s.h   |  4 +-
 arch/powerpc/include/asm/irq.h | 14 ++---
 arch/powerpc/include/asm/livepatch.h   |  7 ++-
 arch/powerpc/include/asm/processor.h   | 39 +
 arch/powerpc/include/asm/ptrace.h  |  2 +-
 arch/powerpc/include/asm/reg.h |  2 +-
 arch/powerpc/include/asm/smp.h | 17 +-
 arch/powerpc/include/asm/task_size_user64.h| 42 ++
 arch/powerpc/include/asm/thread_info.h | 19 ---
 arch/powerpc/kernel/asm-offsets.c  | 10 ++--
 arch/powerpc/kernel/entry_32.S | 66 --
 arch/powerpc/kernel/entry_64.S | 12 ++--
 arch/powerpc/kernel/epapr_hcalls.S |  5 +-
 arch/powerpc/kernel/exceptions-64e.S   | 13 +
 arch/powerpc/kernel/exceptions-64s.S   |  2 +-
 arch/powerpc/kernel/head_32.S  | 14 ++---
 arch/powerpc/kernel/head_40x.S |  4 +-
 arch/powerpc/kernel/head_44x.S |  8 +--
 arch/powerpc/kernel/head_64.S  |  1 +
 arch/powerpc/kernel/head_8xx.S |  2 +-
 arch/powerpc/kernel/head_booke.h   | 12 +---
 arch/powerpc/kernel/head_fsl_booke.S   | 16 +++---
 arch/powerpc/kernel/idle_6xx.S |  8 +--
 arch/powerpc/kernel/idle_book3e.S  |  2 +-
 arch/powerpc/kernel/idle_e500.S|  8 +--
 

[PATCH v2 3/3] powerpc/64s: Implement KUAP for Radix MMU

2018-12-09 Thread Russell Currey
Kernel Userspace Access Prevention utilises a feature of
the Radix MMU which disallows read and write access to userspace
addresses.  By utilising this, the kernel is prevented from accessing
user data from outside of trusted paths that perform proper safety
checks, such as copy_{to/from}_user() and friends.

Userspace access is disabled from early boot and is only enabled when:

- exiting the kernel and entering userspace
- performing an operation like copy_{to/from}_user()
- context switching to a process that has access enabled

and similarly, access is disabled again when exiting userspace and
entering the kernel.

This feature has a slight performance impact which I roughly measured
to be
3% slower in the worst case (performing 1GB of 1 byte read()/write()
syscalls), and is gated behind the CONFIG_PPC_KUAP option for
performance-critical builds.

This feature can be tested by using the lkdtm driver (CONFIG_LKDTM=y)
and performing the following:

echo ACCESS_USERSPACE > [debugfs]/provoke-crash/DIRECT

if enabled, this should send SIGSEGV to the thread.

The KUAP state is tracked in the PACA because reading the register
that manages these accesses is costly. This Has the unfortunate
downside of another layer of abstraction for platforms that implement
the locks and unlocks, but this could be useful in future for other
things too, like counters for benchmarking or smartly handling lots
of small accesses at once.

Signed-off-by: Russell Currey 
---
 .../powerpc/include/asm/book3s/64/kup-radix.h | 36 +++
 arch/powerpc/include/asm/exception-64s.h  | 15 ++--
 arch/powerpc/include/asm/kup.h|  3 ++
 arch/powerpc/include/asm/mmu.h|  9 -
 arch/powerpc/include/asm/reg.h|  1 +
 arch/powerpc/mm/pgtable-radix.c   | 14 
 arch/powerpc/mm/pkeys.c   |  7 ++--
 arch/powerpc/platforms/Kconfig.cputype|  1 +
 8 files changed, 81 insertions(+), 5 deletions(-)
 create mode 100644 arch/powerpc/include/asm/book3s/64/kup-radix.h

diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h 
b/arch/powerpc/include/asm/book3s/64/kup-radix.h
new file mode 100644
index ..93273ca99310
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_KUP_RADIX_H
+#define _ASM_POWERPC_KUP_RADIX_H
+
+#ifndef __ASSEMBLY__
+#ifdef CONFIG_PPC_KUAP
+#include 
+/*
+ * We do have the ability to individually lock/unlock reads and writes rather
+ * than both at once, however it's a significant performance hit due to needing
+ * to do a read-modify-write, which adds a mfspr, which is slow.  As a result,
+ * locking/unlocking both at once is preferred.
+ */
+static inline void unlock_user_access(void __user *to, const void __user *from,
+ unsigned long size)
+{
+   if (!mmu_has_feature(MMU_FTR_RADIX_KUAP))
+   return;
+
+   mtspr(SPRN_AMR, 0);
+   isync();
+   get_paca()->user_access_allowed = 1;
+}
+
+static inline void lock_user_access(void __user *to, const void __user *from,
+   unsigned long size)
+{
+   if (!mmu_has_feature(MMU_FTR_RADIX_KUAP))
+   return;
+
+   mtspr(SPRN_AMR, AMR_LOCKED);
+   get_paca()->user_access_allowed = 0;
+}
+#endif /* CONFIG_PPC_KUAP */
+#endif /* __ASSEMBLY__ */
+#endif
diff --git a/arch/powerpc/include/asm/exception-64s.h 
b/arch/powerpc/include/asm/exception-64s.h
index 4d971ca1e69b..0ed4923c1282 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -264,8 +264,19 @@ BEGIN_FTR_SECTION_NESTED(943)  
\
std ra,offset(r13); \
 END_FTR_SECTION_NESTED(ftr,ftr,943)
 
-#define LOCK_USER_ACCESS(reg)
-#define UNLOCK_USER_ACCESS(reg)
+#define LOCK_USER_ACCESS(reg)  \
+BEGIN_MMU_FTR_SECTION_NESTED(944)  \
+   lis reg,(AMR_LOCKED)@highest;   \
+   rldicr  reg,reg,32,31;  \
+   mtspr   SPRN_AMR,reg;   \
+END_MMU_FTR_SECTION_NESTED(MMU_FTR_RADIX_KUAP,MMU_FTR_RADIX_KUAP,944)
+
+#define UNLOCK_USER_ACCESS(reg)
\
+BEGIN_MMU_FTR_SECTION_NESTED(945)  \
+   li  reg,0;  \
+   mtspr   SPRN_AMR,reg;   \
+   isync;  \
+END_MMU_FTR_SECTION_NESTED(MMU_FTR_RADIX_KUAP,MMU_FTR_RADIX_KUAP,945)
 
 #define EXCEPTION_PROLOG_0(area)   \
GET_PACA(r13); 

[PATCH v2 0/3] [PATCH v2 0/3] Kernel Userspace Protection for Radix MMU

2018-12-09 Thread Russell Currey
This series is based on Christophe's series:
http://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=78469
with some minor changes.

I wanted to move my patches to apply at the tail of his series to make it
easier for the two of us to work on independent parts, so I'm resending my
part of the series with the intent that it applies at the end.  There are
two required changes to his series to make this work:

In patch 04/11, the #ifdef around the paca_struct flag user_access_allowed
needs to be dropped.

With my patches removed, patch 11/11 needs to not reference
asm/book3s/64/kup-radix.h in asm/book3s/64/kup.h (so below the kup.h chunk
in that patch).

Sorry for being a pain, I'd rather not send a gigantic series full of
patches that aren't mine.

This branch shows how I'd imagine it would be pulled together:
https://github.com/ruscur/linux/commits/kuap2

Since the last version of this series:

  - fixed issues booting on hash, and the series now fully bisects
  - dropped some parts which are now part of Christophe's series
  - Fix __patch_instruction() in early boot
  - save three instructions in LOCK_USER_ACCESS()

Russell Currey (3):
  powerpc/mm/radix: Use KUEP API for Radix MMU
  powerpc/lib: Refactor __patch_instruction() to use __put_user_asm()
  powerpc/64s: Implement KUAP for Radix MMU

 .../powerpc/include/asm/book3s/64/kup-radix.h | 36 +++
 arch/powerpc/include/asm/exception-64s.h  | 15 ++--
 arch/powerpc/include/asm/kup.h|  3 ++
 arch/powerpc/include/asm/mmu.h|  9 -
 arch/powerpc/include/asm/reg.h|  1 +
 arch/powerpc/lib/code-patching.c  |  4 +--
 arch/powerpc/mm/pgtable-radix.c   | 25 +++--
 arch/powerpc/mm/pkeys.c   |  7 ++--
 arch/powerpc/platforms/Kconfig.cputype|  2 ++
 9 files changed, 92 insertions(+), 10 deletions(-)
 create mode 100644 arch/powerpc/include/asm/book3s/64/kup-radix.h

-- 
2.19.2



[PATCH v2 1/3] powerpc/mm/radix: Use KUEP API for Radix MMU

2018-12-09 Thread Russell Currey
Execution protection already exists on radix, this just refactors
the radix init to provide the KUEP setup function instead.

Thus, the only functional change is that it can now be disabled.

Signed-off-by: Russell Currey 
---
 arch/powerpc/mm/pgtable-radix.c| 11 ---
 arch/powerpc/platforms/Kconfig.cputype |  1 +
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index 931156069a81..3565e266994b 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -535,8 +535,14 @@ static void radix_init_amor(void)
mtspr(SPRN_AMOR, (3ul << 62));
 }
 
-static void radix_init_iamr(void)
+#ifdef CONFIG_PPC_KUEP
+void __init setup_kuep(bool disabled)
 {
+   if (disabled || !early_radix_enabled())
+   return;
+
+   pr_warn("Activating Kernel Userspace Execution Prevention\n");
+
/*
 * Radix always uses key0 of the IAMR to determine if an access is
 * allowed. We set bit 0 (IBM bit 1) of key0, to prevent instruction
@@ -544,6 +550,7 @@ static void radix_init_iamr(void)
 */
mtspr(SPRN_IAMR, (1ul << 62));
 }
+#endif
 
 void __init radix__early_init_mmu(void)
 {
@@ -605,7 +612,6 @@ void __init radix__early_init_mmu(void)
 
memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
 
-   radix_init_iamr();
radix_init_pgtable();
/* Switch to the guard PID before turning on MMU */
radix__switch_mmu_context(NULL, _mm);
@@ -627,7 +633,6 @@ void radix__early_init_mmu_secondary(void)
  __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
radix_init_amor();
}
-   radix_init_iamr();
 
radix__switch_mmu_context(NULL, _mm);
if (cpu_has_feature(CPU_FTR_HVMODE))
diff --git a/arch/powerpc/platforms/Kconfig.cputype 
b/arch/powerpc/platforms/Kconfig.cputype
index 9997b5ea5693..48cc8df0fdd2 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -335,6 +335,7 @@ config PPC_RADIX_MMU
bool "Radix MMU Support"
depends on PPC_BOOK3S_64
select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA
+   select PPC_HAVE_KUEP
default y
help
  Enable support for the Power ISA 3.0 Radix style MMU. Currently this
-- 
2.19.2



[PATCH v2 2/3] powerpc/lib: Refactor __patch_instruction() to use __put_user_asm()

2018-12-09 Thread Russell Currey
__patch_instruction() is called in early boot, and uses
__put_user_size(), which includes the locks and unlocks for KUAP,
which could either be called too early, or in the Radix case, forced to
use "early_" versions of functions just to safely handle this one case.

__put_user_asm() does not do this, and thus is safe to use both in early
boot, and later on since in this case it should only ever be touching
kernel memory.

__patch_instruction() was previously refactored to use __put_user_size()
in order to be able to return -EFAULT, which would allow the kernel to
patch instructions in userspace, which should never happen.  This has
the functional change of causing faults on userspace addresses if KUAP
is turned on, which should never happen in practice.

A future enhancement could be to double check the patch address is
definitely allowed to be tampered with by the kernel.

Signed-off-by: Russell Currey 
---
 arch/powerpc/lib/code-patching.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index 89502cbccb1b..15e8c6339960 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -26,9 +26,9 @@
 static int __patch_instruction(unsigned int *exec_addr, unsigned int instr,
   unsigned int *patch_addr)
 {
-   int err;
+   int err = 0;
 
-   __put_user_size(instr, patch_addr, 4, err);
+   __put_user_asm(instr, patch_addr, err, "stw");
if (err)
return err;
 
-- 
2.19.2



[PATCH v3] powerpc/uaccess: fix warning/error with access_ok()

2018-12-09 Thread Christophe Leroy
With the following piece of code, the following compilation warning
is encountered:

if (_IOC_DIR(ioc) != _IOC_NONE) {
int verify = _IOC_DIR(ioc) & _IOC_READ ? VERIFY_WRITE : 
VERIFY_READ;

if (!access_ok(verify, ioarg, _IOC_SIZE(ioc))) {

drivers/platform/test/dev.c: In function 'my_ioctl':
drivers/platform/test/dev.c:219:7: warning: unused variable 'verify' 
[-Wunused-variable]
   int verify = _IOC_DIR(ioc) & _IOC_READ ? VERIFY_WRITE : VERIFY_READ;

This patch fixes it by referencing 'type' in the macro allthough
doing nothing with it.

Signed-off-by: Christophe Leroy 
---
 v3: not changing __access_ok() anymyre, only referencing type in access_ok()

 v2: fixed the three direct users of __access_ok()

 arch/powerpc/include/asm/uaccess.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/uaccess.h 
b/arch/powerpc/include/asm/uaccess.h
index 15bea9a0f260..ebc0b916dcf9 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -63,7 +63,7 @@ static inline int __access_ok(unsigned long addr, unsigned 
long size,
 #endif
 
 #define access_ok(type, addr, size)\
-   (__chk_user_ptr(addr),  \
+   (__chk_user_ptr(addr), (void)(type),\
 __access_ok((__force unsigned long)(addr), (size), get_fs()))
 
 /*
-- 
2.13.3



Re: [PATCH v2] misc: cxl: Use device_type helpers to access the node type

2018-12-09 Thread Michael Ellerman
Rob Herring  writes:

> Remove directly accessing device_type property and use the
> of_node_is_type accessor instead. While not using it here, this is
> part of eventually removing the struct device_node.type pointer.
>
> Cc: Frederic Barrat 
> Cc: Arnd Bergmann 
> Cc: Greg Kroah-Hartman 
> Cc: linuxppc-dev@lists.ozlabs.org
> Acked-by: Andrew Donnellan 
> Signed-off-by: Rob Herring 
> ---
> v2:
> - Reword commit message as this change was using the .type ptr.

I already have v1 in my next.

cheers

> diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
> index b66d832d3233..c79ba1c699ad 100644
> --- a/drivers/misc/cxl/pci.c
> +++ b/drivers/misc/cxl/pci.c
> @@ -1718,7 +1718,6 @@ int cxl_slot_is_switched(struct pci_dev *dev)
>  {
>   struct device_node *np;
>   int depth = 0;
> - const __be32 *prop;
>  
>   if (!(np = pci_device_to_OF_node(dev))) {
>   pr_err("cxl: np = NULL\n");
> @@ -1727,8 +1726,7 @@ int cxl_slot_is_switched(struct pci_dev *dev)
>   of_node_get(np);
>   while (np) {
>   np = of_get_next_parent(np);
> - prop = of_get_property(np, "device_type", NULL);
> - if (!prop || strcmp((char *)prop, "pciex"))
> + if (!of_node_is_type(np, "pciex"))
>   break;
>   depth++;
>   }
> -- 
> 2.19.1


Re: [PATCH kernel v4 04/19] powerpc/powernv: Move npu struct from pnv_phb to pci_controller

2018-12-09 Thread David Gibson
On Mon, Dec 10, 2018 at 01:50:35PM +1100, Alexey Kardashevskiy wrote:
> 
> 
> On 06/12/2018 09:40, David Gibson wrote:
> > On Wed, Dec 05, 2018 at 05:17:57PM +1100, Alexey Kardashevskiy wrote:
> >>
> >>
> >> On 05/12/2018 16:47, Alexey Kardashevskiy wrote:
> >>>
> >>>
> >>> On 05/12/2018 16:14, David Gibson wrote:
>  On Fri, Nov 23, 2018 at 04:52:49PM +1100, Alexey Kardashevskiy wrote:
> > The powernv PCI code stores NPU data in the pnv_phb struct. The latter
> > is referenced by pci_controller::private_data. We are going to have NPU2
> > support in the pseries platform as well but it does not store any
> > private_data in in the pci_controller struct; and even if it did,
> > it would be a different data structure.
> >
> > This makes npu a pointer and stores it one level higher in
> > the pci_controller struct.
> >
> > Signed-off-by: Alexey Kardashevskiy 
> > ---
> > Changes:
> > v4:
> > * changed subj from "powerpc/powernv: Detach npu struct from pnv_phb"
> > * got rid of global list of npus - store them now in pci_controller
> > * got rid of npdev_to_npu() helper
> > ---
> >  arch/powerpc/include/asm/pci-bridge.h|  1 +
> >  arch/powerpc/platforms/powernv/pci.h | 16 -
> >  arch/powerpc/platforms/powernv/npu-dma.c | 81 ++--
> >  3 files changed, 64 insertions(+), 34 deletions(-)
> >
> > diff --git a/arch/powerpc/include/asm/pci-bridge.h 
> > b/arch/powerpc/include/asm/pci-bridge.h
> > index 94d4490..aee4fcc 100644
> > --- a/arch/powerpc/include/asm/pci-bridge.h
> > +++ b/arch/powerpc/include/asm/pci-bridge.h
> > @@ -129,6 +129,7 @@ struct pci_controller {
> >  #endif /* CONFIG_PPC64 */
> >  
> > void *private_data;
> > +   struct npu *npu;
> >  };
> >  
> >  /* These are used for config access before all the PCI probing
> > diff --git a/arch/powerpc/platforms/powernv/pci.h 
> > b/arch/powerpc/platforms/powernv/pci.h
> > index 2131373..f2d50974 100644
> > --- a/arch/powerpc/platforms/powernv/pci.h
> > +++ b/arch/powerpc/platforms/powernv/pci.h
> > @@ -8,9 +8,6 @@
> >  
> >  struct pci_dn;
> >  
> > -/* Maximum possible number of ATSD MMIO registers per NPU */
> > -#define NV_NMMU_ATSD_REGS 8
> > -
> >  enum pnv_phb_type {
> > PNV_PHB_IODA1   = 0,
> > PNV_PHB_IODA2   = 1,
> > @@ -176,19 +173,6 @@ struct pnv_phb {
> > unsigned intdiag_data_size;
> > u8  *diag_data;
> >  
> > -   /* Nvlink2 data */
> > -   struct npu {
> > -   int index;
> > -   __be64 *mmio_atsd_regs[NV_NMMU_ATSD_REGS];
> > -   unsigned int mmio_atsd_count;
> > -
> > -   /* Bitmask for MMIO register usage */
> > -   unsigned long mmio_atsd_usage;
> > -
> > -   /* Do we need to explicitly flush the nest mmu? */
> > -   bool nmmu_flush;
> > -   } npu;
> > -
> > int p2p_target_count;
> >  };
> >  
> > diff --git a/arch/powerpc/platforms/powernv/npu-dma.c 
> > b/arch/powerpc/platforms/powernv/npu-dma.c
> > index 91d488f..7dd5c0e5 100644
> > --- a/arch/powerpc/platforms/powernv/npu-dma.c
> > +++ b/arch/powerpc/platforms/powernv/npu-dma.c
> > @@ -327,6 +327,25 @@ struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct 
> > pnv_ioda_pe *npe)
> > return gpe;
> >  }
> >  
> > +/*
> > + * NPU2 ATS
> > + */
> > +/* Maximum possible number of ATSD MMIO registers per NPU */
> > +#define NV_NMMU_ATSD_REGS 8
> > +
> > +/* An NPU descriptor, valid for POWER9 only */
> > +struct npu {
> > +   int index;
> > +   __be64 *mmio_atsd_regs[NV_NMMU_ATSD_REGS];
> > +   unsigned int mmio_atsd_count;
> > +
> > +   /* Bitmask for MMIO register usage */
> > +   unsigned long mmio_atsd_usage;
> > +
> > +   /* Do we need to explicitly flush the nest mmu? */
> > +   bool nmmu_flush;
> > +};
> > +
> >  /* Maximum number of nvlinks per npu */
> >  #define NV_MAX_LINKS 6
> >  
> > @@ -478,7 +497,6 @@ static void acquire_atsd_reg(struct npu_context 
> > *npu_context,
> > int i, j;
> > struct npu *npu;
> > struct pci_dev *npdev;
> > -   struct pnv_phb *nphb;
> >  
> > for (i = 0; i <= max_npu2_index; i++) {
> > mmio_atsd_reg[i].reg = -1;
> > @@ -493,8 +511,10 @@ static void acquire_atsd_reg(struct npu_context 
> > *npu_context,
> > if (!npdev)
> > continue;
> >  
> > -   nphb = 
> > pci_bus_to_host(npdev->bus)->private_data;
> 

[PATCH v5 22/25] syscall_get_arch: add "struct task_struct *" argument

2018-12-09 Thread Dmitry V. Levin
This argument is required to extend the generic ptrace API with
PTRACE_GET_SYSCALL_INFO request: syscall_get_arch() is going
to be called from ptrace_request() along with syscall_get_nr(),
syscall_get_arguments(), syscall_get_error(), and
syscall_get_return_value() functions with a tracee as their argument.

Reverts: 5e937a9ae913 ("syscall_get_arch: remove useless function arguments")
Reverts: 1002d94d3076 ("syscall.h: fix doc text for syscall_get_arch()")
Reviewed-by: Andy Lutomirski  # for x86
Reviewed-by: Palmer Dabbelt 
Acked-by: Paul Burton  # MIPS parts
Acked-by: Michael Ellerman  (powerpc)
Cc: Eric Paris 
Cc: Paul Moore 
Cc: Richard Henderson 
Cc: Ivan Kokshaysky 
Cc: Matt Turner 
Cc: Vineet Gupta 
Cc: Russell King 
Cc: Catalin Marinas 
Cc: Will Deacon 
Cc: Mark Salter 
Cc: Aurelien Jacquiot 
Cc: Yoshinori Sato 
Cc: Richard Kuo 
Cc: Tony Luck 
Cc: Fenghua Yu 
Cc: Geert Uytterhoeven 
Cc: Michal Simek 
Cc: Greentime Hu 
Cc: Vincent Chen 
Cc: Ley Foon Tan 
Cc: Jonas Bonn 
Cc: Stefan Kristiansson 
Cc: Stafford Horne 
Cc: James E.J. Bottomley 
Cc: Helge Deller 
Cc: Albert Ou 
Cc: Martin Schwidefsky 
Cc: Heiko Carstens 
Cc: Rich Felker 
Cc: David S. Miller 
Cc: Guan Xuetao 
Cc: Jeff Dike 
Cc: Richard Weinberger 
Cc: Chris Zankel 
Cc: Max Filippov 
Cc: Arnd Bergmann 
Cc: Kees Cook 
Cc: Will Drewry 
Cc: Oleg Nesterov 
Cc: Elvira Khabirova 
Cc: Eugene Syromyatnikov 
Cc: Ralf Baechle 
Cc: James Hogan 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Thomas Gleixner 
Cc: Ingo Molnar 
Cc: Borislav Petkov 
Cc: H. Peter Anvin 
Cc: x...@kernel.org
Cc: linux-al...@vger.kernel.org
Cc: linux-snps-...@lists.infradead.org
Cc: linux-arm-ker...@lists.infradead.org
Cc: linux-c6x-...@linux-c6x.org
Cc: uclinux-h8-de...@lists.sourceforge.jp
Cc: linux-hexa...@vger.kernel.org
Cc: linux-i...@vger.kernel.org
Cc: linux-m...@lists.linux-m68k.org
Cc: linux-m...@vger.kernel.org
Cc: nios2-...@lists.rocketboards.org
Cc: openr...@lists.librecores.org
Cc: linux-par...@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-ri...@lists.infradead.org
Cc: linux-s...@vger.kernel.org
Cc: linux...@vger.kernel.org
Cc: sparcli...@vger.kernel.org
Cc: linux...@lists.infradead.org
Cc: linux-xte...@linux-xtensa.org
Cc: linux-a...@vger.kernel.org
Cc: linux-au...@redhat.com
Signed-off-by: Dmitry V. Levin 
---

Notes:
v5: fixed asm-generic docs by reverting 1002d94d3076, added Cc
v2: cleaned up mips part, added Reviewed-by

 arch/alpha/include/asm/syscall.h  |  2 +-
 arch/arc/include/asm/syscall.h|  2 +-
 arch/arm/include/asm/syscall.h|  2 +-
 arch/arm64/include/asm/syscall.h  |  4 ++--
 arch/c6x/include/asm/syscall.h|  2 +-
 arch/csky/include/asm/syscall.h   |  2 +-
 arch/h8300/include/asm/syscall.h  |  2 +-
 arch/hexagon/include/asm/syscall.h|  2 +-
 arch/ia64/include/asm/syscall.h   |  2 +-
 arch/m68k/include/asm/syscall.h   |  2 +-
 arch/microblaze/include/asm/syscall.h |  2 +-
 arch/mips/include/asm/syscall.h   |  6 +++---
 arch/mips/kernel/ptrace.c |  2 +-
 arch/nds32/include/asm/syscall.h  |  2 +-
 arch/nios2/include/asm/syscall.h  |  2 +-
 arch/openrisc/include/asm/syscall.h   |  2 +-
 arch/parisc/include/asm/syscall.h |  4 ++--
 arch/powerpc/include/asm/syscall.h| 10 --
 arch/riscv/include/asm/syscall.h  |  2 +-
 arch/s390/include/asm/syscall.h   |  4 ++--
 arch/sh/include/asm/syscall_32.h  |  2 +-
 arch/sh/include/asm/syscall_64.h  |  2 +-
 arch/sparc/include/asm/syscall.h  |  5 +++--
 arch/unicore32/include/asm/syscall.h  |  2 +-
 arch/x86/include/asm/syscall.h|  8 +---
 arch/x86/um/asm/syscall.h |  2 +-
 arch/xtensa/include/asm/syscall.h |  2 +-
 include/asm-generic/syscall.h |  5 +++--
 kernel/auditsc.c  |  4 ++--
 kernel/seccomp.c  |  4 ++--
 30 files changed, 52 insertions(+), 42 deletions(-)

diff --git a/arch/alpha/include/asm/syscall.h b/arch/alpha/include/asm/syscall.h
index 437758bdc49f..288779aa9847 100644
--- a/arch/alpha/include/asm/syscall.h
+++ b/arch/alpha/include/asm/syscall.h
@@ -31,7 +31,7 @@ syscall_get_return_value(struct task_struct *task, struct 
pt_regs *regs)
 }
 
 static inline int
-syscall_get_arch(void)
+syscall_get_arch(struct task_struct *task)
 {
return AUDIT_ARCH_ALPHA;
 }
diff --git a/arch/arc/include/asm/syscall.h b/arch/arc/include/asm/syscall.h
index c7fc4c0c3bcb..caf2697ef5b7 100644
--- a/arch/arc/include/asm/syscall.h
+++ b/arch/arc/include/asm/syscall.h
@@ -70,7 +70,7 @@ syscall_get_arguments(struct task_struct *task, struct 
pt_regs *regs,
 }
 
 static inline int
-syscall_get_arch(void)
+syscall_get_arch(struct task_struct *task)
 {
return IS_ENABLED(CONFIG_ISA_ARCOMPACT)
? (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)
diff --git a/arch/arm/include/asm/syscall.h b/arch/arm/include/asm/syscall.h
index 06dea6bce293..3940ceac0bdc 100644
--- a/arch/arm/include/asm/syscall.h
+++ 

[PATCH v5 00/25] ptrace: add PTRACE_GET_SYSCALL_INFO request

2018-12-09 Thread Dmitry V. Levin
PTRACE_GET_SYSCALL_INFO is a generic ptrace API that lets ptracer obtain
details of the syscall the tracee is blocked in.

There are two reasons for a special syscall-related ptrace request.

Firstly, with the current ptrace API there are cases when ptracer cannot
retrieve necessary information about syscalls.  Some examples include:
* The notorious int-0x80-from-64-bit-task issue.  See [1] for details.
In short, if a 64-bit task performs a syscall through int 0x80, its tracer
has no reliable means to find out that the syscall was, in fact,
a compat syscall, and misidentifies it.
* Syscall-enter-stop and syscall-exit-stop look the same for the tracer.
Common practice is to keep track of the sequence of ptrace-stops in order
not to mix the two syscall-stops up.  But it is not as simple as it looks;
for example, strace had a (just recently fixed) long-standing bug where
attaching strace to a tracee that is performing the execve system call
led to the tracer identifying the following syscall-exit-stop as
syscall-enter-stop, which messed up all the state tracking.
* Since the introduction of commit 84d77d3f06e7e8dea057d10e8ec77ad71f721be3
("ptrace: Don't allow accessing an undumpable mm"), both PTRACE_PEEKDATA
and process_vm_readv become unavailable when the process dumpable flag
is cleared.  On such architectures as ia64 this results in all syscall
arguments being unavailable for the tracer.

Secondly, ptracers also have to support a lot of arch-specific code for
obtaining information about the tracee.  For some architectures, this
requires a ptrace(PTRACE_PEEKUSER, ...) invocation for every syscall
argument and return value.

PTRACE_GET_SYSCALL_INFO returns the following structure:

struct ptrace_syscall_info {
__u8 op;/* PTRACE_SYSCALL_INFO_* */
__u8 __pad0[3];
__u32 arch;
__u64 instruction_pointer;
__u64 stack_pointer;
__u64 frame_pointer;
union {
struct {
__u64 nr;
__u64 args[6];
} entry;
struct {
__s64 rval;
__u8 is_error;
__u8 __pad1[7];
} exit;
struct {
__u64 nr;
__u64 args[6];
__u32 ret_data;
__u8 __pad2[4];
} seccomp;
};
};

The structure was chosen according to [2], except for the following
changes:
* seccomp substructure was added as a superset of entry substructure;
* the type of nr field was changed from int to __u64 because syscall
numbers are, as a practical matter, 64 bits;
* stack_pointer and frame_pointer fields were added along with
instruction_pointer field since they are readily available and can save
the tracer from extra PTRACE_GETREGS/PTRACE_GETREGSET calls;
* arch is always initialized to aid with tracing system calls such as
execve();
* instruction_pointer, stack_pointer, and frame_pointer are always
initialized so they could be easily obtained for non-syscall stops;
* a boolean is_error field was added along with rval field, this way
the tracer can more reliably distinguish a return value
from an error value.

strace has been ported to PTRACE_GET_SYSCALL_INFO, you can find it
at [3] and [4].

[1] 
https://lore.kernel.org/lkml/ca+55afzcsvmddj9lh_gdbz1ozhyem6zrgpbdajnywm2lf_e...@mail.gmail.com/
[2] 
https://lore.kernel.org/lkml/caobl_7gm0n80n7j_dfw_eqyflyzq+sf4y2avsccv88tb3aw...@mail.gmail.com/
[3] https://github.com/strace/strace/commits/ldv/PTRACE_GET_SYSCALL_INFO
[4] https://gitlab.com/strace/strace/commits/ldv/PTRACE_GET_SYSCALL_INFO

Notes:
v5:
* Merge separate series and patches into the single series.
* Change PTRACE_EVENTMSG_SYSCALL_{ENTRY,EXIT} values as requested by Oleg.
* Change struct ptrace_syscall_info: generalize instruction_pointer,
  stack_pointer, and frame_pointer fields by moving them from
  ptrace_syscall_info.{entry,seccomp} substructures to ptrace_syscall_info
  and initializing them for all stops.
* Add PTRACE_SYSCALL_INFO_NONE, assign it to ptrace_syscall_info.op
  when not in a syscall stop, so e.g. "strace -i" could use the same
  PTRACE_SYSCALL_INFO_SECCOMP interface to obtain instruction_pointer
  when the tracee is in a signal stop.
* Patch all remaining architectures to provide all necessary
  syscall_get_* functions.
* Make available for all architectures: do not conditionalize on
  CONFIG_HAVE_ARCH_TRACEHOOK since all syscall_get_* functions
  are implemented on all architectures.
* Add a test for PTRACE_GET_SYSCALL_INFO to selftests/ptrace.

v4:
* Do not introduce task_struct.ptrace_event,
  use child->last_siginfo->si_code instead.
* Implement PTRACE_SYSCALL_INFO_SECCOMP and ptrace_syscall_info.seccomp
  support along with PTRACE_SYSCALL_INFO_{ENTRY,EXIT} and
  

[PATCH v5 23/25] powerpc/ptrace: replace ptrace_report_syscall() with a tracehook call

2018-12-09 Thread Dmitry V. Levin
From: Elvira Khabirova 

Arch code should use tracehook_*() helpers, as documented
in include/linux/tracehook.h,
ptrace_report_syscall() is not expected to be used outside that file.

The patch does not look very nice, but at least it is correct
and opens the way for PTRACE_GET_SYSCALL_INFO API.

Co-authored-by: Dmitry V. Levin 
Fixes: 5521eb4bca2d ("powerpc/ptrace: Add support for PTRACE_SYSEMU")
Cc: Michael Ellerman 
Cc: Oleg Nesterov 
Cc: Eugene Syromyatnikov 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Breno Leitao 
Cc: Andy Lutomirski 
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Elvira Khabirova 
Signed-off-by: Dmitry V. Levin 
---

Notes:
v5: reverted to a simple approach, compile- and run-tested
v4: rewritten to call tracehook_report_syscall_entry() once, compile-tested
v3: add a descriptive comment
v2: explicitly ignore tracehook_report_syscall_entry() return code

 arch/powerpc/kernel/ptrace.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index afb819f4ca68..714c3480c52d 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -3266,12 +3266,17 @@ long do_syscall_trace_enter(struct pt_regs *regs)
user_exit();
 
if (test_thread_flag(TIF_SYSCALL_EMU)) {
-   ptrace_report_syscall(regs);
/*
+* A nonzero return code from tracehook_report_syscall_entry()
+* tells us to prevent the syscall execution, but we are not
+* going to execute it anyway.
+*
 * Returning -1 will skip the syscall execution. We want to
 * avoid clobbering any register also, thus, not 'gotoing'
 * skip label.
 */
+   if (tracehook_report_syscall_entry(regs))
+   ;
return -1;
}
 
-- 
ldv


[PATCH v5 16/25] powerpc: define syscall_get_error()

2018-12-09 Thread Dmitry V. Levin
syscall_get_error() is required to be implemented on this
architecture in addition to already implemented syscall_get_nr(),
syscall_get_arguments(), syscall_get_return_value(), and
syscall_get_arch() functions in order to extend the generic
ptrace API with PTRACE_GET_SYSCALL_INFO request.

Cc: Michael Ellerman 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Oleg Nesterov 
Cc: Andy Lutomirski 
Cc: Elvira Khabirova 
Cc: Eugene Syromyatnikov 
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Dmitry V. Levin 
---

Notes:
This change has been tested with
tools/testing/selftests/ptrace/get_syscall_info.c and strace,
so it's correct from PTRACE_GET_SYSCALL_INFO point of view.

This cast doubts on commit v4.3-rc1~86^2~81 that changed
syscall_set_return_value() in a way that doesn't quite match
syscall_get_error(), but syscall_set_return_value() is out
of scope of this series, so I just air my concerns.

 arch/powerpc/include/asm/syscall.h | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/arch/powerpc/include/asm/syscall.h 
b/arch/powerpc/include/asm/syscall.h
index ab9f3f0a8637..1d03e753391d 100644
--- a/arch/powerpc/include/asm/syscall.h
+++ b/arch/powerpc/include/asm/syscall.h
@@ -39,6 +39,16 @@ static inline void syscall_rollback(struct task_struct *task,
regs->gpr[3] = regs->orig_gpr3;
 }
 
+static inline long syscall_get_error(struct task_struct *task,
+struct pt_regs *regs)
+{
+   /*
+* If the system call failed,
+* regs->gpr[3] contains a positive ERRORCODE.
+*/
+   return (regs->ccr & 0x1000UL) ? -regs->gpr[3] : 0;
+}
+
 static inline long syscall_get_return_value(struct task_struct *task,
struct pt_regs *regs)
 {
-- 
ldv


Is it worth to fix the crashkernel reserved memory blocks the hotplug issue?

2018-12-09 Thread Pingfan Liu
Hi,
I found in powerpc code, it is doable to reserve memory region in
movable zone, such as crashkernel does. But in x86 code, it checks the
hotpluggable attribute of memory, hence if manually specifying a
region in hotpluggable region, it will fail.
The x86 code:
/* 0 means: find the address automatically */
if (crash_base <= 0) {
/*
* Set CRASH_ADDR_LOW_MAX upper bound for crash memory,
* as old kexec-tools loads bzImage below that, unless
* "crashkernel=size[KMG],high" is specified.
*/
crash_base = memblock_find_in_range(CRASH_ALIGN,
   high ? CRASH_ADDR_HIGH_MAX
: CRASH_ADDR_LOW_MAX,
   crash_size, CRASH_ALIGN);
if (!crash_base) {
pr_info("crashkernel reservation failed - No suitable area found.\n");
return;
}

} else {
unsigned long long start;

start = memblock_find_in_range(crash_base,  --> this func will check
the hotpluggable attribute of memory and return failure if the
specifying region intersects with it.
  crash_base + crash_size,
  crash_size, 1 << 20);
if (start != crash_base) {
pr_info("crashkernel reservation failed - memory is in use.\n");
return;
}
}

Thanks,
Pingfan


Re: [PATCH v3 09/12] powerpc: perf/core: use PERF_PMU_CAP_NO_EXCLUDE for exclude incapable PMUs

2018-12-09 Thread Madhavan Srinivasan



On 06/12/18 10:17 PM, Andrew Murray wrote:

For PowerPC PMUs that do not support context exclusion let's
advertise the PERF_PMU_CAP_NO_EXCLUDE capability. This ensures that
perf will prevent us from handling events where any exclusion flags
are set. Let's also remove the now unnecessary check for exclusion
flags.


Reviewed-by: Madhavan Srinivasan 



Signed-off-by: Andrew Murray 
---
  arch/powerpc/perf/hv-24x7.c | 10 +-
  arch/powerpc/perf/hv-gpci.c | 10 +-
  arch/powerpc/perf/imc-pmu.c | 19 +--
  3 files changed, 3 insertions(+), 36 deletions(-)

diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index 72238ee..d2b8e60 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -1306,15 +1306,6 @@ static int h_24x7_event_init(struct perf_event *event)
return -EINVAL;
}

-   /* unsupported modes and filters */
-   if (event->attr.exclude_user   ||
-   event->attr.exclude_kernel ||
-   event->attr.exclude_hv ||
-   event->attr.exclude_idle   ||
-   event->attr.exclude_host   ||
-   event->attr.exclude_guest)
-   return -EINVAL;
-
/* no branch sampling */
if (has_branch_stack(event))
return -EOPNOTSUPP;
@@ -1577,6 +1568,7 @@ static struct pmu h_24x7_pmu = {
.start_txn   = h_24x7_event_start_txn,
.commit_txn  = h_24x7_event_commit_txn,
.cancel_txn  = h_24x7_event_cancel_txn,
+   .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
  };

  static int hv_24x7_init(void)
diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c
index 43fabb3..735e77b 100644
--- a/arch/powerpc/perf/hv-gpci.c
+++ b/arch/powerpc/perf/hv-gpci.c
@@ -232,15 +232,6 @@ static int h_gpci_event_init(struct perf_event *event)
return -EINVAL;
}

-   /* unsupported modes and filters */
-   if (event->attr.exclude_user   ||
-   event->attr.exclude_kernel ||
-   event->attr.exclude_hv ||
-   event->attr.exclude_idle   ||
-   event->attr.exclude_host   ||
-   event->attr.exclude_guest)
-   return -EINVAL;
-
/* no branch sampling */
if (has_branch_stack(event))
return -EOPNOTSUPP;
@@ -285,6 +276,7 @@ static struct pmu h_gpci_pmu = {
.start   = h_gpci_event_start,
.stop= h_gpci_event_stop,
.read= h_gpci_event_update,
+   .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
  };

  static int hv_gpci_init(void)
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 1fafc32b..1dbb0ee 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -473,15 +473,6 @@ static int nest_imc_event_init(struct perf_event *event)
if (event->hw.sample_period)
return -EINVAL;

-   /* unsupported modes and filters */
-   if (event->attr.exclude_user   ||
-   event->attr.exclude_kernel ||
-   event->attr.exclude_hv ||
-   event->attr.exclude_idle   ||
-   event->attr.exclude_host   ||
-   event->attr.exclude_guest)
-   return -EINVAL;
-
if (event->cpu < 0)
return -EINVAL;

@@ -748,15 +739,6 @@ static int core_imc_event_init(struct perf_event *event)
if (event->hw.sample_period)
return -EINVAL;

-   /* unsupported modes and filters */
-   if (event->attr.exclude_user   ||
-   event->attr.exclude_kernel ||
-   event->attr.exclude_hv ||
-   event->attr.exclude_idle   ||
-   event->attr.exclude_host   ||
-   event->attr.exclude_guest)
-   return -EINVAL;
-
if (event->cpu < 0)
return -EINVAL;

@@ -1069,6 +1051,7 @@ static int update_pmu_ops(struct imc_pmu *pmu)
pmu->pmu.stop = imc_event_stop;
pmu->pmu.read = imc_event_update;
pmu->pmu.attr_groups = pmu->attr_groups;
+   pmu->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE;
pmu->attr_groups[IMC_FORMAT_ATTR] = _format_group;

switch (pmu->domain) {




[PATCH v2] powerpc/perf: Add mem access events to sysfs

2018-12-09 Thread Madhavan Srinivasan
Add mem-loads/mem-stores events to sysfs.
The event is formed based on raw event encoding.
Primary PMU event used here is PM_MRK_INST_CMPL
along with MMCRA[SM] modes and Thresholding bit

Signed-off-by: Madhavan Srinivasan 
---
Changelog v1:
- Added comments on the MMCRA/Raw-event encoding bits set.

 arch/powerpc/perf/power9-events-list.h | 24 
 arch/powerpc/perf/power9-pmu.c |  4 
 2 files changed, 28 insertions(+)

diff --git a/arch/powerpc/perf/power9-events-list.h 
b/arch/powerpc/perf/power9-events-list.h
index 7de344b7d9cc..063c9d9f2516 100644
--- a/arch/powerpc/perf/power9-events-list.h
+++ b/arch/powerpc/perf/power9-events-list.h
@@ -97,3 +97,27 @@ EVENT(PM_MRK_DTLB_MISS_64K,  0x3d156)
 EVENT(PM_DTLB_MISS_16M,0x4c056)
 EVENT(PM_DTLB_MISS_1G, 0x4c05a)
 EVENT(PM_MRK_DTLB_MISS_16M,0x4c15e)
+
+/*
+ * Memory Access Events
+ *
+ * Primary PMU event used here is PM_MRK_INST_CMPL (0x401e0)
+ * To enable capturing of memory profiling, these MMCRA bits
+ * needs to be programmed and corresponding raw event format
+ * encoding.
+ *
+ * MMCRA bits encoding needed are
+ * SM (Sampling Mode)
+ * EM (Eligibility for Random Sampling)
+ * TECE (Threshold Event Counter Event)
+ * TS (Threshold Start Event)
+ * TE (Threshold End Event)
+ *
+ * Corresponding Raw Encoding bits:
+ * sample [EM,SM]
+ * thresh_sel (TECE)
+ * thresh start (TS)
+ * thresh end (TE)
+ */
+EVENT(MEM_LOADS,   0x34340401e0)
+EVENT(MEM_STORES,  0x343c0401e0)
diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c
index e012b1030a5b..a722f25e5d87 100644
--- a/arch/powerpc/perf/power9-pmu.c
+++ b/arch/powerpc/perf/power9-pmu.c
@@ -168,6 +168,8 @@ GENERIC_EVENT_ATTR(branch-instructions, 
PM_BR_CMPL);
 GENERIC_EVENT_ATTR(branch-misses,  PM_BR_MPRED_CMPL);
 GENERIC_EVENT_ATTR(cache-references,   PM_LD_REF_L1);
 GENERIC_EVENT_ATTR(cache-misses,   PM_LD_MISS_L1_FIN);
+GENERIC_EVENT_ATTR(mem-loads,  MEM_LOADS);
+GENERIC_EVENT_ATTR(mem-stores, MEM_STORES);
 
 CACHE_EVENT_ATTR(L1-dcache-load-misses,PM_LD_MISS_L1_FIN);
 CACHE_EVENT_ATTR(L1-dcache-loads,  PM_LD_REF_L1);
@@ -195,6 +197,8 @@ static struct attribute *power9_events_attr[] = {
GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL),
GENERIC_EVENT_PTR(PM_LD_REF_L1),
GENERIC_EVENT_PTR(PM_LD_MISS_L1_FIN),
+   GENERIC_EVENT_PTR(MEM_LOADS),
+   GENERIC_EVENT_PTR(MEM_STORES),
CACHE_EVENT_PTR(PM_LD_MISS_L1_FIN),
CACHE_EVENT_PTR(PM_LD_REF_L1),
CACHE_EVENT_PTR(PM_L1_PREF),
-- 
2.7.4



[PATCH V2 8/8] KVM: PPC: Book3S HV: Allow passthrough of an emulated device to an L3 guest

2018-12-09 Thread Suraj Jitindar Singh
Previously when a device was being emulated by an L1 guest for an L2
guest, that device couldn't then be passed through to an L3 guest. This
was because the L1 guest had no method for accessing L3 memory.

The hcall H_COPY_TOFROM_GUEST provides this access. Thus this setup for
passthrough can now be allowed.

Signed-off-by: Suraj Jitindar Singh 
---
 arch/powerpc/kvm/book3s_64_mmu_radix.c | 9 -
 arch/powerpc/kvm/book3s_hv_nested.c| 5 -
 2 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c 
b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index da89d10e5886..cf16e9d207a5 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -37,11 +37,10 @@ unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int 
pid,
int old_pid, old_lpid;
bool is_load = !!to;
 
-   /* Can't access quadrants 1 or 2 in non-HV mode */
-   if (kvmhv_on_pseries()) {
-   /* TODO h-call */
-   return -EPERM;
-   }
+   /* Can't access quadrants 1 or 2 in non-HV mode, call the HV to do it */
+   if (kvmhv_on_pseries())
+   return plpar_hcall_norets(H_COPY_TOFROM_GUEST, lpid, pid, eaddr,
+ to, from, n);
 
quadrant = 1;
if (!pid)
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c 
b/arch/powerpc/kvm/book3s_hv_nested.c
index f54301fcfbe4..acde90eb56f7 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -1284,11 +1284,6 @@ static long int __kvmhv_nested_page_fault(struct kvm_run 
*run,
}
 
/* passthrough of emulated MMIO case */
-   if (kvmhv_on_pseries()) {
-   pr_err("emulated MMIO passthrough?\n");
-   return -EINVAL;
-   }
-
return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea, writing);
}
if (memslot->flags & KVM_MEM_READONLY) {
-- 
2.13.6



[PATCH V2 7/8] KVM: PPC: Introduce new hcall H_COPY_TOFROM_GUEST to access quadrants 1 & 2

2018-12-09 Thread Suraj Jitindar Singh
A guest cannot access quadrants 1 or 2 as this would result in an
exception. Thus introduce the hcall H_COPY_TOFROM_GUEST to be used by a
guest when it wants to perform an access to quadrants 1 or 2, for
example when it wants to access memory for one of its nested guests.

Also provide an implementation for the kvm-hv module.

Signed-off-by: Suraj Jitindar Singh 
---
 arch/powerpc/include/asm/hvcall.h  |  1 +
 arch/powerpc/include/asm/kvm_book3s.h  |  4 ++
 arch/powerpc/kvm/book3s_64_mmu_radix.c |  7 ++--
 arch/powerpc/kvm/book3s_hv.c   |  6 ++-
 arch/powerpc/kvm/book3s_hv_nested.c| 75 ++
 5 files changed, 89 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/hvcall.h 
b/arch/powerpc/include/asm/hvcall.h
index 33a4fc891947..463c63a9fcf1 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -335,6 +335,7 @@
 #define H_SET_PARTITION_TABLE  0xF800
 #define H_ENTER_NESTED 0xF804
 #define H_TLB_INVALIDATE   0xF808
+#define H_COPY_TOFROM_GUEST0xF80C
 
 /* Values for 2nd argument to H_SET_MODE */
 #define H_SET_MODE_RESOURCE_SET_CIABR  1
diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
b/arch/powerpc/include/asm/kvm_book3s.h
index ea94110bfde4..720483733bb2 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -188,6 +188,9 @@ extern int kvmppc_book3s_hcall_implemented(struct kvm *kvm, 
unsigned long hc);
 extern int kvmppc_book3s_radix_page_fault(struct kvm_run *run,
struct kvm_vcpu *vcpu,
unsigned long ea, unsigned long dsisr);
+extern unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid,
+   gva_t eaddr, void *to, void *from,
+   unsigned long n);
 extern long kvmhv_copy_from_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr,
void *to, unsigned long n);
 extern long kvmhv_copy_to_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr,
@@ -302,6 +305,7 @@ long kvmhv_nested_init(void);
 void kvmhv_nested_exit(void);
 void kvmhv_vm_nested_init(struct kvm *kvm);
 long kvmhv_set_partition_table(struct kvm_vcpu *vcpu);
+long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu);
 void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1);
 void kvmhv_release_all_nested(struct kvm *kvm);
 long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c 
b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index e1e3ef710bd0..da89d10e5886 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -29,9 +29,9 @@
  */
 static int p9_supported_radix_bits[4] = { 5, 9, 9, 13 };
 
-static unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid,
-   gva_t eaddr, void *to, void *from,
-   unsigned long n)
+unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid,
+ gva_t eaddr, void *to, void *from,
+ unsigned long n)
 {
unsigned long quadrant, ret = n;
int old_pid, old_lpid;
@@ -82,6 +82,7 @@ static unsigned long __kvmhv_copy_tofrom_guest_radix(int 
lpid, int pid,
 
return ret;
 }
+EXPORT_SYMBOL_GPL(__kvmhv_copy_tofrom_guest_radix);
 
 static long kvmhv_copy_tofrom_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr,
  void *to, void *from, unsigned long n)
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 2280bc4778f5..bd07f9b7c5e8 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -996,7 +996,11 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
if (nesting_enabled(vcpu->kvm))
ret = kvmhv_do_nested_tlbie(vcpu);
break;
-
+   case H_COPY_TOFROM_GUEST:
+   ret = H_FUNCTION;
+   if (nesting_enabled(vcpu->kvm))
+   ret = kvmhv_copy_tofrom_guest_nested(vcpu);
+   break;
default:
return RESUME_HOST;
}
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c 
b/arch/powerpc/kvm/book3s_hv_nested.c
index 991f40ce4eea..f54301fcfbe4 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -462,6 +462,81 @@ long kvmhv_set_partition_table(struct kvm_vcpu *vcpu)
 }
 
 /*
+ * Handle the H_COPY_TOFROM_GUEST hcall.
+ * r4 = L1 lpid of nested guest
+ * r5 = pid
+ * r6 = eaddr to access
+ * r7 = to buffer (L1 gpa)
+ * r8 = from buffer (L1 gpa)
+ * r9 = n bytes to copy
+ */
+long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu)
+{
+   struct kvm_nested_guest *gp;
+   int l1_lpid = kvmppc_get_gpr(vcpu, 4);
+   int pid = kvmppc_get_gpr(vcpu, 5);
+   gva_t eaddr 

[PATCH V2 6/8] KVM: PPC: Book3S HV: Allow passthrough of an emulated device to an L2 guest

2018-12-09 Thread Suraj Jitindar Singh
Allow for a device which is being emulated at L0 (the host) for an L1
guest to be passed through to a nested (L2) guest.

The existing kvmppc_hv_emulate_mmio function can be used here. The main
challenge is that for a load the result must be stored into the L2 gpr,
not an L1 gpr as would normally be the case after going out to qemu to
complete the operation. This presents a challenge as at this point the
L2 gpr state has been written back into L1 memory.

To work around this we store the address in L1 memory of the L2 gpr
where the result of the load is to be stored and use the new io_gpr
value KVM_MMIO_REG_NESTED_GPR to indicate that this is a nested load for
which completion must be done when returning back into the kernel. Then
in kvmppc_complete_mmio_load() the resultant value is written into L1
memory at the location of the indicated L2 gpr.

Note that we don't currently let an L1 guest emulate a device for an L2
guest which is then passed through to an L3 guest.

Signed-off-by: Suraj Jitindar Singh 
---
 arch/powerpc/include/asm/kvm_book3s.h |  2 +-
 arch/powerpc/include/asm/kvm_host.h   |  3 +++
 arch/powerpc/kvm/book3s_hv.c  | 12 ++
 arch/powerpc/kvm/book3s_hv_nested.c   | 43 ++-
 arch/powerpc/kvm/powerpc.c|  6 +
 5 files changed, 55 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
b/arch/powerpc/include/asm/kvm_book3s.h
index 5883fcce7009..ea94110bfde4 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -311,7 +311,7 @@ int kvmhv_run_single_vcpu(struct kvm_run *kvm_run, struct 
kvm_vcpu *vcpu,
 void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr);
 void kvmhv_restore_hv_return_state(struct kvm_vcpu *vcpu,
   struct hv_guest_state *hr);
-long int kvmhv_nested_page_fault(struct kvm_vcpu *vcpu);
+long int kvmhv_nested_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu);
 
 void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac);
 
diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index fac6f631ed29..7a2483a139cf 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -793,6 +793,7 @@ struct kvm_vcpu_arch {
/* For support of nested guests */
struct kvm_nested_guest *nested;
u32 nested_vcpu_id;
+   gpa_t nested_io_gpr;
 #endif
 
 #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
@@ -827,6 +828,8 @@ struct kvm_vcpu_arch {
 #define KVM_MMIO_REG_FQPR  0x00c0
 #define KVM_MMIO_REG_VSX   0x0100
 #define KVM_MMIO_REG_VMX   0x0180
+#define KVM_MMIO_REG_NESTED_GPR0xffc0
+
 
 #define __KVM_HAVE_ARCH_WQP
 #define __KVM_HAVE_CREATE_DEVICE
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 8a0921176a60..2280bc4778f5 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -985,6 +985,10 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
kvmppc_set_gpr(vcpu, 3, 0);
vcpu->arch.hcall_needed = 0;
return -EINTR;
+   } else if (ret == H_TOO_HARD) {
+   kvmppc_set_gpr(vcpu, 3, 0);
+   vcpu->arch.hcall_needed = 0;
+   return RESUME_HOST;
}
break;
case H_TLB_INVALIDATE:
@@ -1336,7 +1340,7 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, 
struct kvm_vcpu *vcpu,
return r;
 }
 
-static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
+static int kvmppc_handle_nested_exit(struct kvm_run *run, struct kvm_vcpu 
*vcpu)
 {
int r;
int srcu_idx;
@@ -1394,7 +1398,7 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu 
*vcpu)
 */
case BOOK3S_INTERRUPT_H_DATA_STORAGE:
srcu_idx = srcu_read_lock(>kvm->srcu);
-   r = kvmhv_nested_page_fault(vcpu);
+   r = kvmhv_nested_page_fault(run, vcpu);
srcu_read_unlock(>kvm->srcu, srcu_idx);
break;
case BOOK3S_INTERRUPT_H_INST_STORAGE:
@@ -1404,7 +1408,7 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu 
*vcpu)
if (vcpu->arch.shregs.msr & HSRR1_HISI_WRITE)
vcpu->arch.fault_dsisr |= DSISR_ISSTORE;
srcu_idx = srcu_read_lock(>kvm->srcu);
-   r = kvmhv_nested_page_fault(vcpu);
+   r = kvmhv_nested_page_fault(run, vcpu);
srcu_read_unlock(>kvm->srcu, srcu_idx);
break;
 
@@ -4059,7 +4063,7 @@ int kvmhv_run_single_vcpu(struct kvm_run *kvm_run,
if (!nested)
r = kvmppc_handle_exit_hv(kvm_run, vcpu, current);
else
-   r = kvmppc_handle_nested_exit(vcpu);
+   r = kvmppc_handle_nested_exit(kvm_run, vcpu);
}

[PATCH V2 5/8] KVM: PPC: Update kvmppc_st and kvmppc_ld to use quadrants

2018-12-09 Thread Suraj Jitindar Singh
The functions kvmppc_st and kvmppc_ld are used to access guest memory
from the host using a guest effective address. They do so by translating
through the process table to obtain a guest real address and then using
kvm_read_guest or kvm_write_guest to make the access with the guest real
address.

This method of access however only works for L1 guests and will give the
incorrect results for a nested guest.

We can however use the store_to_eaddr and load_from_eaddr kvmppc_ops to
perform the access for a nested guesti (and a L1 guest). So attempt this
method first and fall back to the old method if this fails and we aren't
running a nested guest.

At this stage there is no fall back method to perform the access for a
nested guest and this is left as a future improvement. For now we will
return to the nested guest and rely on the fact that a translation
should be faulted in before retrying the access.

Signed-off-by: Suraj Jitindar Singh 
---
 arch/powerpc/kvm/powerpc.c | 18 --
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 95859c53a5cd..cb029fcab404 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -331,10 +331,17 @@ int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int 
size, void *ptr,
 {
ulong mp_pa = vcpu->arch.magic_page_pa & KVM_PAM & PAGE_MASK;
struct kvmppc_pte pte;
-   int r;
+   int r = -EINVAL;
 
vcpu->stat.st++;
 
+   if (vcpu->kvm->arch.kvm_ops && vcpu->kvm->arch.kvm_ops->store_to_eaddr)
+   r = vcpu->kvm->arch.kvm_ops->store_to_eaddr(vcpu, eaddr, ptr,
+   size);
+
+   if ((!r) || (r == -EAGAIN))
+   return r;
+
r = kvmppc_xlate(vcpu, *eaddr, data ? XLATE_DATA : XLATE_INST,
 XLATE_WRITE, );
if (r < 0)
@@ -367,10 +374,17 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int 
size, void *ptr,
 {
ulong mp_pa = vcpu->arch.magic_page_pa & KVM_PAM & PAGE_MASK;
struct kvmppc_pte pte;
-   int rc;
+   int rc = -EINVAL;
 
vcpu->stat.ld++;
 
+   if (vcpu->kvm->arch.kvm_ops && vcpu->kvm->arch.kvm_ops->load_from_eaddr)
+   rc = vcpu->kvm->arch.kvm_ops->load_from_eaddr(vcpu, eaddr, ptr,
+ size);
+
+   if ((!rc) || (rc == -EAGAIN))
+   return rc;
+
rc = kvmppc_xlate(vcpu, *eaddr, data ? XLATE_DATA : XLATE_INST,
  XLATE_READ, );
if (rc)
-- 
2.13.6



[PATCH V2 4/8] KVM: PPC: Add load_from_eaddr and store_to_eaddr to the kvmppc_ops struct

2018-12-09 Thread Suraj Jitindar Singh
The kvmppc_ops struct is used to store function pointers to kvm
implementation specific functions.

Introduce two new functions load_from_eaddr and store_to_eaddr to be
used to load from and store to a guest effective address respectively.

Also implement these for the kvm-hv module. If we are using the radix
mmu then we can call the functions to access quadrant 1 and 2.

Signed-off-by: Suraj Jitindar Singh 
---
 arch/powerpc/include/asm/kvm_ppc.h |  4 
 arch/powerpc/kvm/book3s_hv.c   | 40 ++
 2 files changed, 44 insertions(+)

diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index 9b89b1918dfc..159dd76700cb 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -326,6 +326,10 @@ struct kvmppc_ops {
unsigned long flags);
void (*giveup_ext)(struct kvm_vcpu *vcpu, ulong msr);
int (*enable_nested)(struct kvm *kvm);
+   int (*load_from_eaddr)(struct kvm_vcpu *vcpu, ulong *eaddr, void *ptr,
+  int size);
+   int (*store_to_eaddr)(struct kvm_vcpu *vcpu, ulong *eaddr, void *ptr,
+ int size);
 };
 
 extern struct kvmppc_ops *kvmppc_hv_ops;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index a56f8413758a..8a0921176a60 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -5214,6 +5214,44 @@ static int kvmhv_enable_nested(struct kvm *kvm)
return 0;
 }
 
+static int kvmhv_load_from_eaddr(struct kvm_vcpu *vcpu, ulong *eaddr, void 
*ptr,
+int size)
+{
+   int rc = -EINVAL;
+
+   if (kvmhv_vcpu_is_radix(vcpu)) {
+   rc = kvmhv_copy_from_guest_radix(vcpu, *eaddr, ptr, size);
+
+   if (rc > 0)
+   rc = -EINVAL;
+   }
+
+   /* For now quadrants are the only way to access nested guest memory */
+   if (rc && vcpu->arch.nested)
+   rc = -EAGAIN;
+
+   return rc;
+}
+
+static int kvmhv_store_to_eaddr(struct kvm_vcpu *vcpu, ulong *eaddr, void *ptr,
+   int size)
+{
+   int rc = -EINVAL;
+
+   if (kvmhv_vcpu_is_radix(vcpu)) {
+   rc = kvmhv_copy_to_guest_radix(vcpu, *eaddr, ptr, size);
+
+   if (rc > 0)
+   rc = -EINVAL;
+   }
+
+   /* For now quadrants are the only way to access nested guest memory */
+   if (rc && vcpu->arch.nested)
+   rc = -EAGAIN;
+
+   return rc;
+}
+
 static struct kvmppc_ops kvm_ops_hv = {
.get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv,
.set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv,
@@ -5254,6 +5292,8 @@ static struct kvmppc_ops kvm_ops_hv = {
.get_rmmu_info = kvmhv_get_rmmu_info,
.set_smt_mode = kvmhv_set_smt_mode,
.enable_nested = kvmhv_enable_nested,
+   .load_from_eaddr = kvmhv_load_from_eaddr,
+   .store_to_eaddr = kvmhv_store_to_eaddr,
 };
 
 static int kvm_init_subcore_bitmap(void)
-- 
2.13.6



[PATCH V2 3/8] KVM: PPC: Book3S HV: Implement functions to access quadrants 1 & 2

2018-12-09 Thread Suraj Jitindar Singh
The POWER9 radix mmu has the concept of quadrants. The quadrant number
is the two high bits of the effective address and determines the fully
qualified address to be used for the translation. The fully qualified
address consists of the effective lpid, the effective pid and the
effective address. This gives then 4 possible quadrants 0, 1, 2, and 3.

When accessing these quadrants the fully qualified address is obtained
as follows:

Quadrant| Hypervisor| Guest
--
| EA[0:1] = 0b00| EA[0:1] = 0b00
0   | effLPID = 0   | effLPID = LPIDR
| effPID  = PIDR| effPID  = PIDR
--
| EA[0:1] = 0b01|
1   | effLPID = LPIDR   | Invalid Access
| effPID  = PIDR|
--
| EA[0:1] = 0b10|
2   | effLPID = LPIDR   | Invalid Access
| effPID  = 0   |
--
| EA[0:1] = 0b11| EA[0:1] = 0b11
3   | effLPID = 0   | effLPID = LPIDR
| effPID  = 0   | effPID  = 0
--

In the Guest;
Quadrant 3 is normally used to address the operating system since this
uses effPID=0 and effLPID=LPIDR, meaning the PID register doesn't need to
be switched.
Quadrant 0 is normally used to address user space since the effLPID and
effPID are taken from the corresponding registers.

In the Host;
Quadrant 0 and 3 are used as above, however the effLPID is always 0 to
address the host.

Quadrants 1 and 2 can be used by the host to address guest memory using
a guest effective address. Since the effLPID comes from the LPID register,
the host loads the LPID of the guest it would like to access (and the
PID of the process) and can perform accesses to a guest effective
address.

This means quadrant 1 can be used to address the guest user space and
quadrant 2 can be used to address the guest operating system from the
hypervisor, using a guest effective address.

Access to the quadrants can cause a Hypervisor Data Storage Interrupt
(HDSI) due to being unable to perform partition scoped translation.
Previously this could only be generated from a guest and so the code
path expects us to take the KVM trampoline in the interrupt handler.
This is no longer the case so we modify the handler to call
bad_page_fault() to check if we were expecting this fault so we can
handle it gracefully and just return with an error code. In the hash mmu
case we still raise an unknown exception since quadrants aren't defined
for the hash mmu.

Signed-off-by: Suraj Jitindar Singh 
---
 arch/powerpc/include/asm/kvm_book3s.h  |  4 ++
 arch/powerpc/kernel/exceptions-64s.S   |  9 
 arch/powerpc/kvm/book3s_64_mmu_radix.c | 97 ++
 arch/powerpc/mm/fault.c|  1 +
 4 files changed, 111 insertions(+)

diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
b/arch/powerpc/include/asm/kvm_book3s.h
index 09f8e9ba69bc..5883fcce7009 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -188,6 +188,10 @@ extern int kvmppc_book3s_hcall_implemented(struct kvm 
*kvm, unsigned long hc);
 extern int kvmppc_book3s_radix_page_fault(struct kvm_run *run,
struct kvm_vcpu *vcpu,
unsigned long ea, unsigned long dsisr);
+extern long kvmhv_copy_from_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr,
+   void *to, unsigned long n);
+extern long kvmhv_copy_to_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr,
+ void *from, unsigned long n);
 extern int kvmppc_mmu_walk_radix_tree(struct kvm_vcpu *vcpu, gva_t eaddr,
  struct kvmppc_pte *gpte, u64 root,
  u64 *pte_ret_p);
diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 89d32bb79d5e..db2691ff4c0b 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -995,7 +995,16 @@ EXC_COMMON_BEGIN(h_data_storage_common)
bl  save_nvgprs
RECONCILE_IRQ_STATE(r10, r11)
addir3,r1,STACK_FRAME_OVERHEAD
+BEGIN_MMU_FTR_SECTION
+   ld  r4,PACA_EXGEN+EX_DAR(r13)
+   lwz r5,PACA_EXGEN+EX_DSISR(r13)
+   std r4,_DAR(r1)
+   std r5,_DSISR(r1)
+   li  r5,SIGSEGV
+   bl  bad_page_fault
+MMU_FTR_SECTION_ELSE
bl  unknown_exception

[PATCH V2 2/8] KVM: PPC: Book3S HV: Add function kvmhv_vcpu_is_radix()

2018-12-09 Thread Suraj Jitindar Singh
There exists a function kvm_is_radix() which is used to determine if a
kvm instance is using the radix mmu. However this only applies to the
first level (L1) guest. Add a function kvmhv_vcpu_is_radix() which can
be used to determine if the current execution context of the vcpu is
radix, accounting for if the vcpu is running a nested guest.

Currently all nested guests must be radix but this may change in the
future.

Signed-off-by: Suraj Jitindar Singh 
---
 arch/powerpc/include/asm/kvm_book3s_64.h | 13 +
 arch/powerpc/kvm/book3s_hv_nested.c  |  1 +
 2 files changed, 14 insertions(+)

diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h 
b/arch/powerpc/include/asm/kvm_book3s_64.h
index 6d298145d564..7a9e472f2872 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -55,6 +55,7 @@ struct kvm_nested_guest {
cpumask_t need_tlb_flush;
cpumask_t cpu_in_guest;
short prev_cpu[NR_CPUS];
+   u8 radix;   /* is this nested guest radix */
 };
 
 /*
@@ -150,6 +151,18 @@ static inline bool kvm_is_radix(struct kvm *kvm)
return kvm->arch.radix;
 }
 
+static inline bool kvmhv_vcpu_is_radix(struct kvm_vcpu *vcpu)
+{
+   bool radix;
+
+   if (vcpu->arch.nested)
+   radix = vcpu->arch.nested->radix;
+   else
+   radix = kvm_is_radix(vcpu->kvm);
+
+   return radix;
+}
+
 #define KVM_DEFAULT_HPT_ORDER  24  /* 16MB HPT by default */
 #endif
 
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c 
b/arch/powerpc/kvm/book3s_hv_nested.c
index 401d2ecbebc5..4fca462e54c4 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -480,6 +480,7 @@ struct kvm_nested_guest *kvmhv_alloc_nested(struct kvm 
*kvm, unsigned int lpid)
if (shadow_lpid < 0)
goto out_free2;
gp->shadow_lpid = shadow_lpid;
+   gp->radix = 1;
 
memset(gp->prev_cpu, -1, sizeof(gp->prev_cpu));
 
-- 
2.13.6



[PATCH V2 1/8] KVM: PPC: Only report KVM_CAP_SPAPR_TCE_VFIO on powernv machines

2018-12-09 Thread Suraj Jitindar Singh
The kvm capability KVM_CAP_SPAPR_TCE_VFIO is used to indicate the
availability of in kernel tce acceleration for vfio. However it is
currently the case that this is only available on a powernv machine,
not for a pseries machine.

Thus make this capability dependent on having the cpu feature
CPU_FTR_HVMODE.

Signed-off-by: Suraj Jitindar Singh 
---
 arch/powerpc/kvm/powerpc.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 2869a299c4ed..95859c53a5cd 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -496,6 +496,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
int r;
/* Assume we're using HV mode when the HV module is loaded */
int hv_enabled = kvmppc_hv_ops ? 1 : 0;
+   int kvm_on_pseries = !cpu_has_feature(CPU_FTR_HVMODE);
 
if (kvm) {
/*
@@ -543,8 +544,11 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 #ifdef CONFIG_PPC_BOOK3S_64
case KVM_CAP_SPAPR_TCE:
case KVM_CAP_SPAPR_TCE_64:
-   /* fallthrough */
+   r = 1;
+   break;
case KVM_CAP_SPAPR_TCE_VFIO:
+   r = !kvm_on_pseries;
+   break;
case KVM_CAP_PPC_RTAS:
case KVM_CAP_PPC_FIXUP_HCALL:
case KVM_CAP_PPC_ENABLE_HCALL:
-- 
2.13.6



[PATCH V2 0/8] KVM: PPC: Implement passthrough of emulated devices for nested guests

2018-12-09 Thread Suraj Jitindar Singh
This patch series allows for emulated devices to be passed through to nested
guests, irrespective of at which level the device is being emulated.

Note that the emulated device must be using dma, not virtio.

For example, passing through an emulated e1000:

1. Emulate the device at L(n) for L(n+1)

qemu-system-ppc64 -netdev type=user,id=net0 -device e1000,netdev=net0

2. Assign the VFIO-PCI driver at L(n+1)

echo vfio-pci > /sys/bus/pci/devices/:00:00.0/driver_override
echo :00:00.0 > /sys/bus/pci/drivers/e1000/unbind
echo :00:00.0 > /sys/bus/pci/drivers/vfio-pci/bind
chmod 666 /dev/vfio/0

3. Pass the device through from L(n+1) to L(n+2)

qemu-system-ppc64 -device vfio-pci,host=:00:00.0

4. L(n+2) can now access the device which will be emulated at L(n)

V1 -> V2:
1/8: None
2/8: None
3/8: None
4/8: None
5/8: None
6/8: Account for L1 differing in endianess in kvmppc_complete_mmio_load()
7/8: None
8/8: None

Suraj Jitindar Singh (8):
  KVM: PPC: Only report KVM_CAP_SPAPR_TCE_VFIO on powernv machines
  KVM: PPC: Book3S HV: Add function kvmhv_vcpu_is_radix()
  KVM: PPC: Book3S HV: Implement functions to access quadrants 1 & 2
  KVM: PPC: Add load_from_eaddr and store_to_eaddr to the kvmppc_ops
struct
  KVM: PPC: Update kvmppc_st and kvmppc_ld to use quadrants
  KVM: PPC: Book3S HV: Allow passthrough of an emulated device to an L2
guest
  KVM: PPC: Introduce new hcall H_COPY_TOFROM_GUEST to access quadrants
1 & 2
  KVM: PPC: Book3S HV: Allow passthrough of an emulated device to an L3
guest

 arch/powerpc/include/asm/hvcall.h|   1 +
 arch/powerpc/include/asm/kvm_book3s.h|  10 ++-
 arch/powerpc/include/asm/kvm_book3s_64.h |  13 
 arch/powerpc/include/asm/kvm_host.h  |   3 +
 arch/powerpc/include/asm/kvm_ppc.h   |   4 ++
 arch/powerpc/kernel/exceptions-64s.S |   9 +++
 arch/powerpc/kvm/book3s_64_mmu_radix.c   |  97 ++
 arch/powerpc/kvm/book3s_hv.c |  58 ++--
 arch/powerpc/kvm/book3s_hv_nested.c  | 114 +--
 arch/powerpc/kvm/powerpc.c   |  30 +++-
 arch/powerpc/mm/fault.c  |   1 +
 11 files changed, 325 insertions(+), 15 deletions(-)

-- 
2.13.6



Re: [PATCH v3 09/12] powerpc: perf/core: use PERF_PMU_CAP_NO_EXCLUDE for exclude incapable PMUs

2018-12-09 Thread Michael Ellerman
Andrew Murray  writes:

> For PowerPC PMUs that do not support context exclusion let's
> advertise the PERF_PMU_CAP_NO_EXCLUDE capability. This ensures that
> perf will prevent us from handling events where any exclusion flags
> are set. Let's also remove the now unnecessary check for exclusion
> flags.
>
> Signed-off-by: Andrew Murray 
> ---
>  arch/powerpc/perf/hv-24x7.c | 10 +-
>  arch/powerpc/perf/hv-gpci.c | 10 +-
>  arch/powerpc/perf/imc-pmu.c | 19 +--
>  3 files changed, 3 insertions(+), 36 deletions(-)

Looks good.

Acked-by: Michael Ellerman  (powerpc)

cheers

> diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
> index 72238ee..d2b8e60 100644
> --- a/arch/powerpc/perf/hv-24x7.c
> +++ b/arch/powerpc/perf/hv-24x7.c
> @@ -1306,15 +1306,6 @@ static int h_24x7_event_init(struct perf_event *event)
>   return -EINVAL;
>   }
>  
> - /* unsupported modes and filters */
> - if (event->attr.exclude_user   ||
> - event->attr.exclude_kernel ||
> - event->attr.exclude_hv ||
> - event->attr.exclude_idle   ||
> - event->attr.exclude_host   ||
> - event->attr.exclude_guest)
> - return -EINVAL;
> -
>   /* no branch sampling */
>   if (has_branch_stack(event))
>   return -EOPNOTSUPP;
> @@ -1577,6 +1568,7 @@ static struct pmu h_24x7_pmu = {
>   .start_txn   = h_24x7_event_start_txn,
>   .commit_txn  = h_24x7_event_commit_txn,
>   .cancel_txn  = h_24x7_event_cancel_txn,
> + .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
>  };
>  
>  static int hv_24x7_init(void)
> diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c
> index 43fabb3..735e77b 100644
> --- a/arch/powerpc/perf/hv-gpci.c
> +++ b/arch/powerpc/perf/hv-gpci.c
> @@ -232,15 +232,6 @@ static int h_gpci_event_init(struct perf_event *event)
>   return -EINVAL;
>   }
>  
> - /* unsupported modes and filters */
> - if (event->attr.exclude_user   ||
> - event->attr.exclude_kernel ||
> - event->attr.exclude_hv ||
> - event->attr.exclude_idle   ||
> - event->attr.exclude_host   ||
> - event->attr.exclude_guest)
> - return -EINVAL;
> -
>   /* no branch sampling */
>   if (has_branch_stack(event))
>   return -EOPNOTSUPP;
> @@ -285,6 +276,7 @@ static struct pmu h_gpci_pmu = {
>   .start   = h_gpci_event_start,
>   .stop= h_gpci_event_stop,
>   .read= h_gpci_event_update,
> + .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
>  };
>  
>  static int hv_gpci_init(void)
> diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
> index 1fafc32b..1dbb0ee 100644
> --- a/arch/powerpc/perf/imc-pmu.c
> +++ b/arch/powerpc/perf/imc-pmu.c
> @@ -473,15 +473,6 @@ static int nest_imc_event_init(struct perf_event *event)
>   if (event->hw.sample_period)
>   return -EINVAL;
>  
> - /* unsupported modes and filters */
> - if (event->attr.exclude_user   ||
> - event->attr.exclude_kernel ||
> - event->attr.exclude_hv ||
> - event->attr.exclude_idle   ||
> - event->attr.exclude_host   ||
> - event->attr.exclude_guest)
> - return -EINVAL;
> -
>   if (event->cpu < 0)
>   return -EINVAL;
>  
> @@ -748,15 +739,6 @@ static int core_imc_event_init(struct perf_event *event)
>   if (event->hw.sample_period)
>   return -EINVAL;
>  
> - /* unsupported modes and filters */
> - if (event->attr.exclude_user   ||
> - event->attr.exclude_kernel ||
> - event->attr.exclude_hv ||
> - event->attr.exclude_idle   ||
> - event->attr.exclude_host   ||
> - event->attr.exclude_guest)
> - return -EINVAL;
> -
>   if (event->cpu < 0)
>   return -EINVAL;
>  
> @@ -1069,6 +1051,7 @@ static int update_pmu_ops(struct imc_pmu *pmu)
>   pmu->pmu.stop = imc_event_stop;
>   pmu->pmu.read = imc_event_update;
>   pmu->pmu.attr_groups = pmu->attr_groups;
> + pmu->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE;
>   pmu->attr_groups[IMC_FORMAT_ATTR] = _format_group;
>  
>   switch (pmu->domain) {
> -- 
> 2.7.4


Re: [PATCH kernel v4 04/19] powerpc/powernv: Move npu struct from pnv_phb to pci_controller

2018-12-09 Thread Alexey Kardashevskiy



On 06/12/2018 09:40, David Gibson wrote:
> On Wed, Dec 05, 2018 at 05:17:57PM +1100, Alexey Kardashevskiy wrote:
>>
>>
>> On 05/12/2018 16:47, Alexey Kardashevskiy wrote:
>>>
>>>
>>> On 05/12/2018 16:14, David Gibson wrote:
 On Fri, Nov 23, 2018 at 04:52:49PM +1100, Alexey Kardashevskiy wrote:
> The powernv PCI code stores NPU data in the pnv_phb struct. The latter
> is referenced by pci_controller::private_data. We are going to have NPU2
> support in the pseries platform as well but it does not store any
> private_data in in the pci_controller struct; and even if it did,
> it would be a different data structure.
>
> This makes npu a pointer and stores it one level higher in
> the pci_controller struct.
>
> Signed-off-by: Alexey Kardashevskiy 
> ---
> Changes:
> v4:
> * changed subj from "powerpc/powernv: Detach npu struct from pnv_phb"
> * got rid of global list of npus - store them now in pci_controller
> * got rid of npdev_to_npu() helper
> ---
>  arch/powerpc/include/asm/pci-bridge.h|  1 +
>  arch/powerpc/platforms/powernv/pci.h | 16 -
>  arch/powerpc/platforms/powernv/npu-dma.c | 81 ++--
>  3 files changed, 64 insertions(+), 34 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/pci-bridge.h 
> b/arch/powerpc/include/asm/pci-bridge.h
> index 94d4490..aee4fcc 100644
> --- a/arch/powerpc/include/asm/pci-bridge.h
> +++ b/arch/powerpc/include/asm/pci-bridge.h
> @@ -129,6 +129,7 @@ struct pci_controller {
>  #endif   /* CONFIG_PPC64 */
>  
>   void *private_data;
> + struct npu *npu;
>  };
>  
>  /* These are used for config access before all the PCI probing
> diff --git a/arch/powerpc/platforms/powernv/pci.h 
> b/arch/powerpc/platforms/powernv/pci.h
> index 2131373..f2d50974 100644
> --- a/arch/powerpc/platforms/powernv/pci.h
> +++ b/arch/powerpc/platforms/powernv/pci.h
> @@ -8,9 +8,6 @@
>  
>  struct pci_dn;
>  
> -/* Maximum possible number of ATSD MMIO registers per NPU */
> -#define NV_NMMU_ATSD_REGS 8
> -
>  enum pnv_phb_type {
>   PNV_PHB_IODA1   = 0,
>   PNV_PHB_IODA2   = 1,
> @@ -176,19 +173,6 @@ struct pnv_phb {
>   unsigned intdiag_data_size;
>   u8  *diag_data;
>  
> - /* Nvlink2 data */
> - struct npu {
> - int index;
> - __be64 *mmio_atsd_regs[NV_NMMU_ATSD_REGS];
> - unsigned int mmio_atsd_count;
> -
> - /* Bitmask for MMIO register usage */
> - unsigned long mmio_atsd_usage;
> -
> - /* Do we need to explicitly flush the nest mmu? */
> - bool nmmu_flush;
> - } npu;
> -
>   int p2p_target_count;
>  };
>  
> diff --git a/arch/powerpc/platforms/powernv/npu-dma.c 
> b/arch/powerpc/platforms/powernv/npu-dma.c
> index 91d488f..7dd5c0e5 100644
> --- a/arch/powerpc/platforms/powernv/npu-dma.c
> +++ b/arch/powerpc/platforms/powernv/npu-dma.c
> @@ -327,6 +327,25 @@ struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct 
> pnv_ioda_pe *npe)
>   return gpe;
>  }
>  
> +/*
> + * NPU2 ATS
> + */
> +/* Maximum possible number of ATSD MMIO registers per NPU */
> +#define NV_NMMU_ATSD_REGS 8
> +
> +/* An NPU descriptor, valid for POWER9 only */
> +struct npu {
> + int index;
> + __be64 *mmio_atsd_regs[NV_NMMU_ATSD_REGS];
> + unsigned int mmio_atsd_count;
> +
> + /* Bitmask for MMIO register usage */
> + unsigned long mmio_atsd_usage;
> +
> + /* Do we need to explicitly flush the nest mmu? */
> + bool nmmu_flush;
> +};
> +
>  /* Maximum number of nvlinks per npu */
>  #define NV_MAX_LINKS 6
>  
> @@ -478,7 +497,6 @@ static void acquire_atsd_reg(struct npu_context 
> *npu_context,
>   int i, j;
>   struct npu *npu;
>   struct pci_dev *npdev;
> - struct pnv_phb *nphb;
>  
>   for (i = 0; i <= max_npu2_index; i++) {
>   mmio_atsd_reg[i].reg = -1;
> @@ -493,8 +511,10 @@ static void acquire_atsd_reg(struct npu_context 
> *npu_context,
>   if (!npdev)
>   continue;
>  
> - nphb = pci_bus_to_host(npdev->bus)->private_data;
> - npu = >npu;
> + npu = pci_bus_to_host(npdev->bus)->npu;
> + if (!npu)
> + continue;

 This patch changes a bunch of places that used to unconditionally
 locate an NPU now have a failure path.

 Given that this used to always have an NPU, doesn't that mean that if
 the NPU is not present something has already gone wrong, and we should
 WARN_ON() or something?
>>>
>>>
>>>
>>> That means this is a leftover since I dropped 

Re: powerpc/boot: Fix build failures with -j 1

2018-12-09 Thread Michael Ellerman
On Thu, 2018-12-06 at 05:10:28 UTC, Michael Ellerman wrote:
> In commit 5e9dcb6188a4 ("powerpc/boot: Expose Kconfig symbols to
> wrapper") we added a dependency to serial.c on autoconf.h:
> 
>   $(obj)/serial.c: $(obj)/autoconf.h
> 
> This works when building in-tree (ie. with KBUILD_OUTPUT unset)
> because the obj tree is the src tree.
> 
> But when building with eg. O=build and -j 1 the build fails:
> 
>   gcc ... -I../arch/powerpc/boot -c -o arch/powerpc/boot/serial.o 
> arch/powerpc/boot/serial.c
>   gcc: error: arch/powerpc/boot/serial.c: No such file or directory
> 
> Why this is only happening with -j 1 is not clear, when building with
> -j greater than 1 somehow we decide to look for serial.c in the src
> tree (../), eg:
> 
>   gcc -I../arch/powerpc/boot -c -o arch/powerpc/boot/serial.o 
> ../arch/powerpc/boot/serial.c
> 
> Regardless we shouldn't be specifying a dependency on serial.c in the
> build tree, we want to add a dependency to the version in $(srctree)
> so fix the rule to say that.
> 
> Fixes: 5e9dcb6188a4 ("powerpc/boot: Expose Kconfig symbols to wrapper")
> Tested-by: Daniel Axtens 
> Signed-off-by: Michael Ellerman 

Applied to powerpc fixes.

https://git.kernel.org/powerpc/c/e41b93a6be57e26a4a123345f826a6

cheers


Re: [1/7] powerpc/papr_scm: Use depend instead of select

2018-12-09 Thread Michael Ellerman
On Thu, 2018-12-06 at 15:17:08 UTC, Oliver O'Halloran wrote:
> Making PAPR_SCM select LIBNVDIMM results in circular dependencies in
> Kconfig when another symbol depends on it. Fix this by replacing the
> select with a depends.
> 
> Fixes: b5beae5e224f ("powerpc/pseries: Add driver for PAPR SCM regions")
> Reported-by: Alastair D'Silva 
> Signed-off-by: Oliver O'Halloran 

Series applied to powerpc fixes, thanks.

https://git.kernel.org/powerpc/c/14ebfec0712f66a4ef037fb7ac0df6

cheers


Re: [bpf] bpf: powerpc: fix broken uapi for BPF_PROG_TYPE_PERF_EVENT

2018-12-09 Thread Michael Ellerman
On Thu, 2018-12-06 at 09:27:01 UTC, Sandipan Das wrote:
> Now that there are different variants of pt_regs for userspace and
> kernel, the uapi for the BPF_PROG_TYPE_PERF_EVENT program type must
> be changed by exporting the user_pt_regs structure instead of the
> pt_regs structure that is in-kernel only.
> 
> Fixes: 002af9391bfb ("powerpc: Split user/kernel definitions of struct 
> pt_regs")
> Signed-off-by: Sandipan Das 

Applied to powerpc fixes, thanks.

https://git.kernel.org/powerpc/c/a6460b03f945ee216dbf42a0d9ee78

cheers


Re: [PATCH bpf] bpf: powerpc: fix broken uapi for BPF_PROG_TYPE_PERF_EVENT

2018-12-09 Thread Michael Ellerman
Alexei Starovoitov  writes:

> On Thu, Dec 06, 2018 at 02:57:01PM +0530, Sandipan Das wrote:
>> Now that there are different variants of pt_regs for userspace and
>> kernel, the uapi for the BPF_PROG_TYPE_PERF_EVENT program type must
>> be changed by exporting the user_pt_regs structure instead of the
>> pt_regs structure that is in-kernel only.
>> 
>> Fixes: 002af9391bfb ("powerpc: Split user/kernel definitions of struct 
>> pt_regs")
>> Signed-off-by: Sandipan Das 
>
> Thanks! Applied to bpf tree.

I already have this in the fixes branch of the powerpc tree. I don't
mind if you take it also, git should probably handle the merge OK.

cheers


Re: [PATCH NEXT v2 1/4] powerpc/pasemi: Add PCI initialisation for Nemo board.

2018-12-09 Thread Darren Stevens
Michael,

Any comments on these?

On 19/08/2018, Darren Stevens wrote:
> The A-Eon Amigaone X1000's Nemo motherboard has an AMD SB600
> connected to one of the PCI-e root ports on its PaSemi
> Pwrficient 1628M SoC. Normally the SB600 southbridge would be
> connected to a hidden PCI-e port on the system's northbridge,
> and as a result doesn't fully comply with the PCI-e spec.
> 
> Add code to relax the PCI-e detection in both the root port
> and the Linux kernel allowing on board devices to be detected.
> 
> Signed-off-by: Darren Stevens 
>
> ---
>
> Changes made:
>
> v2: Replaced sb600_bus with a define, moved iob_mapbase into 
> sb600_set_flag()
> Created some register/Flag names (as I don't have the docs
> for the PA6T-1682M)



Re: [PATCH bpf] bpf: powerpc: fix broken uapi for BPF_PROG_TYPE_PERF_EVENT

2018-12-09 Thread Alexei Starovoitov
On Thu, Dec 06, 2018 at 02:57:01PM +0530, Sandipan Das wrote:
> Now that there are different variants of pt_regs for userspace and
> kernel, the uapi for the BPF_PROG_TYPE_PERF_EVENT program type must
> be changed by exporting the user_pt_regs structure instead of the
> pt_regs structure that is in-kernel only.
> 
> Fixes: 002af9391bfb ("powerpc: Split user/kernel definitions of struct 
> pt_regs")
> Signed-off-by: Sandipan Das 

Thanks! Applied to bpf tree.



[PATCH v2.1 24/34] dt-bindings: arm: Convert Rockchip board/soc bindings to json-schema

2018-12-09 Thread Heiko Stuebner
Convert Rockchip SoC bindings to DT schema format using json-schema.

Cc: Mark Rutland 
Cc: Heiko Stuebner 
Cc: devicet...@vger.kernel.org
Cc: linux-arm-ker...@lists.infradead.org
Cc: linux-rockc...@lists.infradead.org
Signed-off-by: Rob Herring 
[move to per-board entries and added recently added boards]
Signed-off-by: Heiko Stuebner 
---
Hi Rob,

there are boards where the description adds much value and on others
it is maybe less, but personally I'd like to keep things uniform,
as that makes reading these things easier if the format stays the
same all the time, so I've gone forward and just did the conversion

make dtbs_check did not complain about the schema it seems but I
did end up with an error later on:

FATAL ERROR: Unknown output format "yaml"
make[2]: *** [scripts/Makefile.lib:313: arch/arm/boot/dts/rk3036-evb.dt.yaml] 
Fehler 1

But I guess I did not mess up the schema yet.

So does it look ok that way?
Heiko

 .../devicetree/bindings/arm/rockchip.txt  | 240 --
 .../devicetree/bindings/arm/rockchip.yaml | 419 ++
 2 files changed, 419 insertions(+), 240 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/arm/rockchip.txt
 create mode 100644 Documentation/devicetree/bindings/arm/rockchip.yaml

diff --git a/Documentation/devicetree/bindings/arm/rockchip.txt 
b/Documentation/devicetree/bindings/arm/rockchip.txt
deleted file mode 100644
index 0cc71236d639..
--- a/Documentation/devicetree/bindings/arm/rockchip.txt
+++ /dev/null
@@ -1,240 +0,0 @@
-Rockchip platforms device tree bindings

-
-- 96boards RK3399 Ficus (ROCK960 Enterprise Edition)
-Required root node properties:
-  - compatible = "vamrs,ficus", "rockchip,rk3399";
-
-- 96boards RK3399 Rock960 (ROCK960 Consumer Edition)
-Required root node properties:
-  - compatible = "vamrs,rock960", "rockchip,rk3399";
-
-- Amarula Vyasa RK3288 board
-Required root node properties:
-  - compatible = "amarula,vyasa-rk3288", "rockchip,rk3288";
-
-- Asus Tinker board
-Required root node properties:
-  - compatible = "asus,rk3288-tinker", "rockchip,rk3288";
-
-- Asus Tinker board S
-Required root node properties:
-  - compatible = "asus,rk3288-tinker-s", "rockchip,rk3288";
-
-- Kylin RK3036 board:
-Required root node properties:
-  - compatible = "rockchip,kylin-rk3036", "rockchip,rk3036";
-
-- MarsBoard RK3066 board:
-Required root node properties:
-  - compatible = "haoyu,marsboard-rk3066", "rockchip,rk3066a";
-
-- bq Curie 2 tablet:
-Required root node properties:
-  - compatible = "mundoreader,bq-curie2", "rockchip,rk3066a";
-
-- ChipSPARK Rayeager PX2 board:
-Required root node properties:
-  - compatible = "chipspark,rayeager-px2", "rockchip,rk3066a";
-
-- Radxa Rock board:
-Required root node properties:
-  - compatible = "radxa,rock", "rockchip,rk3188";
-
-- Radxa Rock2 Square board:
-Required root node properties:
-  - compatible = "radxa,rock2-square", "rockchip,rk3288";
-
-- Rikomagic MK808 v1 board:
-Required root node properties:
-  - compatible = "rikomagic,mk808", "rockchip,rk3066a";
-
-- Firefly Firefly-RK3288 board:
-Required root node properties:
-  - compatible = "firefly,firefly-rk3288", "rockchip,rk3288";
-or
-  - compatible = "firefly,firefly-rk3288-beta", "rockchip,rk3288";
-
-- Firefly Firefly-RK3288 Reload board:
-Required root node properties:
-  - compatible = "firefly,firefly-rk3288-reload", "rockchip,rk3288";
-
-- Firefly Firefly-RK3399 board:
-Required root node properties:
-  - compatible = "firefly,firefly-rk3399", "rockchip,rk3399";
-
-- Firefly roc-rk3328-cc board:
-Required root node properties:
-  - compatible = "firefly,roc-rk3328-cc", "rockchip,rk3328";
-
-- Firefly ROC-RK3399-PC board:
-Required root node properties:
-  - compatible = "firefly,roc-rk3399-pc", "rockchip,rk3399";
-
-- ChipSPARK PopMetal-RK3288 board:
-Required root node properties:
-  - compatible = "chipspark,popmetal-rk3288", "rockchip,rk3288";
-
-- Netxeon R89 board:
-Required root node properties:
-  - compatible = "netxeon,r89", "rockchip,rk3288";
-
-- GeekBuying GeekBox:
-Required root node properties:
-  - compatible = "geekbuying,geekbox", "rockchip,rk3368";
-
-- Google Bob (Asus Chromebook Flip C101PA):
-Required root node properties:
-   compatible = "google,bob-rev13", "google,bob-rev12",
-"google,bob-rev11", "google,bob-rev10",
-"google,bob-rev9", "google,bob-rev8",
-"google,bob-rev7", "google,bob-rev6",
-"google,bob-rev5", "google,bob-rev4",
-"google,bob", "google,gru", "rockchip,rk3399";
-
-- Google Brain (dev-board):
-Required root node properties:
-  - compatible = "google,veyron-brain-rev0", "google,veyron-brain",
-"google,veyron", 

Re: use generic DMA mapping code in powerpc V4

2018-12-09 Thread Christian Zigotzky
Next step: c1bfcad4b0cf38ce5b00f7ad880d3a13484c123a (dma-mapping, 
powerpc: simplify the arch dma_set_mask override)


Result: No problems with the PASEMI onboard ethernet and with booting 
the X5000 (P5020 board).


-- Christian


On 09 December 2018 at 3:20PM, Christian Zigotzky wrote:
Next step: 602307b034734ce77a05da4b99333a2eaf6b6482 (powerpc/fsl_pci: 
simplify fsl_pci_dma_set_mask)


git checkout 602307b034734ce77a05da4b99333a2eaf6b6482

The PASEMI onboard ethernet works and the X5000 boots.

-- Christian


On 08 December 2018 at 2:47PM, Christian Zigotzky wrote:
Next step: e15cd8173ef85e9cc3e2a9c7cc2982f5c1355615 (powerpc/dma: fix 
an off-by-one in dma_capable)


git checkout e15cd8173ef85e9cc3e2a9c7cc2982f5c1355615

The PASEMI onboard ethernet also works with this commit and the X5000 
boots without any problems.


-- Christian


On 08 December 2018 at 11:29AM, Christian Zigotzky wrote:
Next step: 7ebc44c535f6bd726d553756d38b137acc718443 (powerpc/dma: 
remove max_direct_dma_addr)


git checkout 7ebc44c535f6bd726d553756d38b137acc718443

OK, the PASEMI onboard ethernet works and the P5020 board boots.

-- Christian


On 07 December 2018 at 7:33PM, Christian Zigotzky wrote:
Next step: 13c1fdec5682b6e13257277fa16aa31f342d167d (powerpc/dma: 
move pci_dma_dev_setup_swiotlb to fsl_pci.c)


git checkout 13c1fdec5682b6e13257277fa16aa31f342d167d

Result: The PASEMI onboard ethernet works and the P5020 board boots.

— Christian














[PATCH v3 6/6] arm, unicore32: remove early_alloc*() wrappers

2018-12-09 Thread Mike Rapoport
On arm and unicore32i the early_alloc_aligned() and and early_alloc() are
oneliner wrappers for memblock_alloc.

Replace their usage with direct call to memblock_alloc.

Suggested-by: Christoph Hellwig 
Signed-off-by: Mike Rapoport 
---
 arch/arm/mm/mmu.c   | 11 +++
 arch/unicore32/mm/mmu.c | 12 
 2 files changed, 7 insertions(+), 16 deletions(-)

diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 0a04c9a5..57de0dd 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -719,14 +719,9 @@ EXPORT_SYMBOL(phys_mem_access_prot);
 
 #define vectors_base() (vectors_high() ? 0x : 0)
 
-static void __init *early_alloc_aligned(unsigned long sz, unsigned long align)
-{
-   return memblock_alloc(sz, align);
-}
-
 static void __init *early_alloc(unsigned long sz)
 {
-   return early_alloc_aligned(sz, sz);
+   return memblock_alloc(sz, sz);
 }
 
 static void *__init late_alloc(unsigned long sz)
@@ -998,7 +993,7 @@ void __init iotable_init(struct map_desc *io_desc, int nr)
if (!nr)
return;
 
-   svm = early_alloc_aligned(sizeof(*svm) * nr, __alignof__(*svm));
+   svm = memblock_alloc(sizeof(*svm) * nr, __alignof__(*svm));
 
for (md = io_desc; nr; md++, nr--) {
create_mapping(md);
@@ -1020,7 +1015,7 @@ void __init vm_reserve_area_early(unsigned long addr, 
unsigned long size,
struct vm_struct *vm;
struct static_vm *svm;
 
-   svm = early_alloc_aligned(sizeof(*svm), __alignof__(*svm));
+   svm = memblock_alloc(sizeof(*svm), __alignof__(*svm));
 
vm = >vm;
vm->addr = (void *)addr;
diff --git a/arch/unicore32/mm/mmu.c b/arch/unicore32/mm/mmu.c
index 50d8c1a..a402192 100644
--- a/arch/unicore32/mm/mmu.c
+++ b/arch/unicore32/mm/mmu.c
@@ -141,16 +141,12 @@ static void __init build_mem_type_table(void)
 
 #define vectors_base() (vectors_high() ? 0x : 0)
 
-static void __init *early_alloc(unsigned long sz)
-{
-   return memblock_alloc(sz, sz);
-}
-
 static pte_t * __init early_pte_alloc(pmd_t *pmd, unsigned long addr,
unsigned long prot)
 {
if (pmd_none(*pmd)) {
-   pte_t *pte = early_alloc(PTRS_PER_PTE * sizeof(pte_t));
+   pte_t *pte = memblock_alloc(PTRS_PER_PTE * sizeof(pte_t),
+   PTRS_PER_PTE * sizeof(pte_t));
__pmd_populate(pmd, __pa(pte) | prot);
}
BUG_ON(pmd_bad(*pmd));
@@ -352,7 +348,7 @@ static void __init devicemaps_init(void)
/*
 * Allocate the vector page early.
 */
-   vectors = early_alloc(PAGE_SIZE);
+   vectors = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
 
for (addr = VMALLOC_END; addr; addr += PGDIR_SIZE)
pmd_clear(pmd_off_k(addr));
@@ -429,7 +425,7 @@ void __init paging_init(void)
top_pmd = pmd_off_k(0x);
 
/* allocate the zero page. */
-   zero_page = early_alloc(PAGE_SIZE);
+   zero_page = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
 
bootmem_init();
 
-- 
2.7.4



[PATCH v3 5/6] arch: simplify several early memory allocations

2018-12-09 Thread Mike Rapoport
There are several early memory allocations in arch/ code that use
memblock_phys_alloc() to allocate memory, convert the returned physical
address to the virtual address and then set the allocated memory to zero.

Exactly the same behaviour can be achieved simply by calling
memblock_alloc(): it allocates the memory in the same way as
memblock_phys_alloc(), then it performs the phys_to_virt() conversion and
clears the allocated memory.

Replace the longer sequence with a simpler call to memblock_alloc().

Signed-off-by: Mike Rapoport 
---
 arch/arm/mm/mmu.c |  4 +---
 arch/c6x/mm/dma-coherent.c|  9 ++---
 arch/nds32/mm/init.c  | 12 
 arch/powerpc/kernel/setup-common.c|  4 ++--
 arch/powerpc/mm/pgtable_32.c  |  4 +---
 arch/powerpc/mm/ppc_mmu_32.c  |  3 +--
 arch/powerpc/platforms/powernv/opal.c |  3 +--
 arch/sparc/kernel/prom_64.c   |  7 ++-
 arch/sparc/mm/init_64.c   |  9 +++--
 arch/unicore32/mm/mmu.c   |  4 +---
 10 files changed, 18 insertions(+), 41 deletions(-)

diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index f5cc1cc..0a04c9a5 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -721,9 +721,7 @@ EXPORT_SYMBOL(phys_mem_access_prot);
 
 static void __init *early_alloc_aligned(unsigned long sz, unsigned long align)
 {
-   void *ptr = __va(memblock_phys_alloc(sz, align));
-   memset(ptr, 0, sz);
-   return ptr;
+   return memblock_alloc(sz, align);
 }
 
 static void __init *early_alloc(unsigned long sz)
diff --git a/arch/c6x/mm/dma-coherent.c b/arch/c6x/mm/dma-coherent.c
index 01305c7..ffc49e2 100644
--- a/arch/c6x/mm/dma-coherent.c
+++ b/arch/c6x/mm/dma-coherent.c
@@ -118,8 +118,6 @@ void arch_dma_free(struct device *dev, size_t size, void 
*vaddr,
  */
 void __init coherent_mem_init(phys_addr_t start, u32 size)
 {
-   phys_addr_t bitmap_phys;
-
if (!size)
return;
 
@@ -135,11 +133,8 @@ void __init coherent_mem_init(phys_addr_t start, u32 size)
if (dma_size & (PAGE_SIZE - 1))
++dma_pages;
 
-   bitmap_phys = memblock_phys_alloc(BITS_TO_LONGS(dma_pages) * 
sizeof(long),
- sizeof(long));
-
-   dma_bitmap = phys_to_virt(bitmap_phys);
-   memset(dma_bitmap, 0, dma_pages * PAGE_SIZE);
+   dma_bitmap = memblock_alloc(BITS_TO_LONGS(dma_pages) * sizeof(long),
+   sizeof(long));
 }
 
 static void c6x_dma_sync(struct device *dev, phys_addr_t paddr, size_t size,
diff --git a/arch/nds32/mm/init.c b/arch/nds32/mm/init.c
index 131104b..9f19be8 100644
--- a/arch/nds32/mm/init.c
+++ b/arch/nds32/mm/init.c
@@ -80,8 +80,7 @@ static void __init map_ram(void)
}
 
/* Alloc one page for holding PTE's... */
-   pte = (pte_t *) __va(memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE));
-   memset(pte, 0, PAGE_SIZE);
+   pte = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
set_pmd(pme, __pmd(__pa(pte) + _PAGE_KERNEL_TABLE));
 
/* Fill the newly allocated page with PTE'S */
@@ -113,8 +112,7 @@ static void __init fixedrange_init(void)
pgd = swapper_pg_dir + pgd_index(vaddr);
pud = pud_offset(pgd, vaddr);
pmd = pmd_offset(pud, vaddr);
-   fixmap_pmd_p = (pmd_t *) __va(memblock_phys_alloc(PAGE_SIZE, 
PAGE_SIZE));
-   memset(fixmap_pmd_p, 0, PAGE_SIZE);
+   fixmap_pmd_p = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
set_pmd(pmd, __pmd(__pa(fixmap_pmd_p) + _PAGE_KERNEL_TABLE));
 
 #ifdef CONFIG_HIGHMEM
@@ -126,8 +124,7 @@ static void __init fixedrange_init(void)
pgd = swapper_pg_dir + pgd_index(vaddr);
pud = pud_offset(pgd, vaddr);
pmd = pmd_offset(pud, vaddr);
-   pte = (pte_t *) __va(memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE));
-   memset(pte, 0, PAGE_SIZE);
+   pte = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
set_pmd(pmd, __pmd(__pa(pte) + _PAGE_KERNEL_TABLE));
pkmap_page_table = pte;
 #endif /* CONFIG_HIGHMEM */
@@ -152,8 +149,7 @@ void __init paging_init(void)
fixedrange_init();
 
/* allocate space for empty_zero_page */
-   zero_page = __va(memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE));
-   memset(zero_page, 0, PAGE_SIZE);
+   zero_page = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
zone_sizes_init();
 
empty_zero_page = virt_to_page(zero_page);
diff --git a/arch/powerpc/kernel/setup-common.c 
b/arch/powerpc/kernel/setup-common.c
index 93ee370..8f6c763 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -459,8 +459,8 @@ void __init smp_setup_cpu_maps(void)
 
DBG("smp_setup_cpu_maps()\n");
 
-   cpu_to_phys_id = __va(memblock_phys_alloc(nr_cpu_ids * sizeof(u32), 
__alignof__(u32)));
-   memset(cpu_to_phys_id, 0, nr_cpu_ids * sizeof(u32));
+   cpu_to_phys_id = memblock_alloc(nr_cpu_ids * 

[PATCH v3 4/6] openrisc: simplify pte_alloc_one_kernel()

2018-12-09 Thread Mike Rapoport
The pte_alloc_one_kernel() function allocates a page using
__get_free_page(GFP_KERNEL) when mm initialization is complete and
memblock_phys_alloc() on the earlier stages. The physical address of the
page allocated with memblock_phys_alloc() is converted to the virtual
address and in the both cases the allocated page is cleared using
clear_page().

The code is simplified by replacing __get_free_page() with
get_zeroed_page() and by replacing memblock_phys_alloc() with
memblock_alloc().

Signed-off-by: Mike Rapoport 
Acked-by: Stafford Horne 
---
 arch/openrisc/mm/ioremap.c | 11 ---
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/arch/openrisc/mm/ioremap.c b/arch/openrisc/mm/ioremap.c
index c969752..cfef989 100644
--- a/arch/openrisc/mm/ioremap.c
+++ b/arch/openrisc/mm/ioremap.c
@@ -123,13 +123,10 @@ pte_t __ref *pte_alloc_one_kernel(struct mm_struct *mm,
 {
pte_t *pte;
 
-   if (likely(mem_init_done)) {
-   pte = (pte_t *) __get_free_page(GFP_KERNEL);
-   } else {
-   pte = (pte_t *) __va(memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE));
-   }
+   if (likely(mem_init_done))
+   pte = (pte_t *)get_zeroed_page(GFP_KERNEL);
+   else
+   pte = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
 
-   if (pte)
-   clear_page(pte);
return pte;
 }
-- 
2.7.4



[PATCH v3 3/6] sh: prefer memblock APIs returning virtual address

2018-12-09 Thread Mike Rapoport
Rather than use the memblock_alloc_base that returns a physical address and
then convert this address to the virtual one, use appropriate memblock
function that returns a virtual address.

There is a small functional change in the allocation of then NODE_DATA().
Instead of panicing if the local allocation failed, the non-local
allocation attempt will be made.

Signed-off-by: Mike Rapoport 
---
 arch/sh/mm/init.c | 18 +-
 arch/sh/mm/numa.c |  5 ++---
 2 files changed, 7 insertions(+), 16 deletions(-)

diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index c8c13c77..3576b5f 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -192,24 +192,16 @@ void __init page_table_range_init(unsigned long start, 
unsigned long end,
 void __init allocate_pgdat(unsigned int nid)
 {
unsigned long start_pfn, end_pfn;
-#ifdef CONFIG_NEED_MULTIPLE_NODES
-   unsigned long phys;
-#endif
 
get_pfn_range_for_nid(nid, _pfn, _pfn);
 
 #ifdef CONFIG_NEED_MULTIPLE_NODES
-   phys = __memblock_alloc_base(sizeof(struct pglist_data),
-   SMP_CACHE_BYTES, end_pfn << PAGE_SHIFT);
-   /* Retry with all of system memory */
-   if (!phys)
-   phys = __memblock_alloc_base(sizeof(struct pglist_data),
-   SMP_CACHE_BYTES, 
memblock_end_of_DRAM());
-   if (!phys)
+   NODE_DATA(nid) = memblock_alloc_try_nid_nopanic(
+   sizeof(struct pglist_data),
+   SMP_CACHE_BYTES, MEMBLOCK_LOW_LIMIT,
+   MEMBLOCK_ALLOC_ACCESSIBLE, nid);
+   if (!NODE_DATA(nid))
panic("Can't allocate pgdat for node %d\n", nid);
-
-   NODE_DATA(nid) = __va(phys);
-   memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
 #endif
 
NODE_DATA(nid)->node_start_pfn = start_pfn;
diff --git a/arch/sh/mm/numa.c b/arch/sh/mm/numa.c
index 830e8b3..c4bde61 100644
--- a/arch/sh/mm/numa.c
+++ b/arch/sh/mm/numa.c
@@ -41,9 +41,8 @@ void __init setup_bootmem_node(int nid, unsigned long start, 
unsigned long end)
__add_active_range(nid, start_pfn, end_pfn);
 
/* Node-local pgdat */
-   NODE_DATA(nid) = __va(memblock_alloc_base(sizeof(struct pglist_data),
-SMP_CACHE_BYTES, end));
-   memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
+   NODE_DATA(nid) = memblock_alloc_node(sizeof(struct pglist_data),
+SMP_CACHE_BYTES, nid);
 
NODE_DATA(nid)->node_start_pfn = start_pfn;
NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
-- 
2.7.4



[PATCH v3 2/6] microblaze: prefer memblock API returning virtual address

2018-12-09 Thread Mike Rapoport
Rather than use the memblock_alloc_base that returns a physical address and
then convert this address to the virtual one, use appropriate memblock
function that returns a virtual address.

Signed-off-by: Mike Rapoport 
Tested-by: Michal Simek 
---
 arch/microblaze/mm/init.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c
index b17fd8a..44f4b89 100644
--- a/arch/microblaze/mm/init.c
+++ b/arch/microblaze/mm/init.c
@@ -363,8 +363,9 @@ void __init *early_get_page(void)
 * Mem start + kernel_tlb -> here is limit
 * because of mem mapping from head.S
 */
-   return __va(memblock_alloc_base(PAGE_SIZE, PAGE_SIZE,
-   memory_start + kernel_tlb));
+   return memblock_alloc_try_nid_raw(PAGE_SIZE, PAGE_SIZE,
+   MEMBLOCK_LOW_LIMIT, memory_start + kernel_tlb,
+   NUMA_NO_NODE);
 }
 
 #endif /* CONFIG_MMU */
-- 
2.7.4



[PATCH v3 1/6] powerpc: prefer memblock APIs returning virtual address

2018-12-09 Thread Mike Rapoport
There are a several places that allocate memory using memblock APIs that
return a physical address, convert the returned address to the virtual
address and frequently also memset(0) the allocated range.

Update these places to use memblock allocators already returning a virtual
address. Use memblock functions that clear the allocated memory instead of
calling memset(0) where appropriate.

The calls to memblock_alloc_base() that were not followed by memset(0) are
replaced with memblock_alloc_try_nid_raw(). Since the latter does not
panic() when the allocation fails, the appropriate panic() calls are added
to the call sites.

Signed-off-by: Mike Rapoport 
---
 arch/powerpc/kernel/paca.c | 16 ++--
 arch/powerpc/kernel/setup_64.c | 24 ++--
 arch/powerpc/mm/hash_utils_64.c|  6 +++---
 arch/powerpc/mm/pgtable-book3e.c   |  8 ++--
 arch/powerpc/mm/pgtable-book3s64.c |  5 +
 arch/powerpc/mm/pgtable-radix.c| 25 +++--
 arch/powerpc/platforms/pasemi/iommu.c  |  5 +++--
 arch/powerpc/platforms/pseries/setup.c | 18 ++
 arch/powerpc/sysdev/dart_iommu.c   |  7 +--
 9 files changed, 51 insertions(+), 63 deletions(-)

diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 913bfca..276d36d4 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -27,7 +27,7 @@
 static void *__init alloc_paca_data(unsigned long size, unsigned long align,
unsigned long limit, int cpu)
 {
-   unsigned long pa;
+   void *ptr;
int nid;
 
/*
@@ -42,17 +42,15 @@ static void *__init alloc_paca_data(unsigned long size, 
unsigned long align,
nid = early_cpu_to_node(cpu);
}
 
-   pa = memblock_alloc_base_nid(size, align, limit, nid, MEMBLOCK_NONE);
-   if (!pa) {
-   pa = memblock_alloc_base(size, align, limit);
-   if (!pa)
-   panic("cannot allocate paca data");
-   }
+   ptr = memblock_alloc_try_nid(size, align, MEMBLOCK_LOW_LIMIT,
+limit, nid);
+   if (!ptr)
+   panic("cannot allocate paca data");
 
if (cpu == boot_cpuid)
memblock_set_bottom_up(false);
 
-   return __va(pa);
+   return ptr;
 }
 
 #ifdef CONFIG_PPC_PSERIES
@@ -118,7 +116,6 @@ static struct slb_shadow * __init new_slb_shadow(int cpu, 
unsigned long limit)
}
 
s = alloc_paca_data(sizeof(*s), L1_CACHE_BYTES, limit, cpu);
-   memset(s, 0, sizeof(*s));
 
s->persistent = cpu_to_be32(SLB_NUM_BOLTED);
s->buffer_length = cpu_to_be32(sizeof(*s));
@@ -222,7 +219,6 @@ void __init allocate_paca(int cpu)
paca = alloc_paca_data(sizeof(struct paca_struct), L1_CACHE_BYTES,
limit, cpu);
paca_ptrs[cpu] = paca;
-   memset(paca, 0, sizeof(struct paca_struct));
 
initialise_paca(paca, cpu);
 #ifdef CONFIG_PPC_PSERIES
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 236c115..3dcd779 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -634,19 +634,17 @@ __init u64 ppc64_bolted_size(void)
 
 static void *__init alloc_stack(unsigned long limit, int cpu)
 {
-   unsigned long pa;
+   void *ptr;
 
BUILD_BUG_ON(STACK_INT_FRAME_SIZE % 16);
 
-   pa = memblock_alloc_base_nid(THREAD_SIZE, THREAD_SIZE, limit,
-   early_cpu_to_node(cpu), MEMBLOCK_NONE);
-   if (!pa) {
-   pa = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit);
-   if (!pa)
-   panic("cannot allocate stacks");
-   }
+   ptr = memblock_alloc_try_nid(THREAD_SIZE, THREAD_SIZE,
+MEMBLOCK_LOW_LIMIT, limit,
+early_cpu_to_node(cpu));
+   if (!ptr)
+   panic("cannot allocate stacks");
 
-   return __va(pa);
+   return ptr;
 }
 
 void __init irqstack_early_init(void)
@@ -739,20 +737,17 @@ void __init emergency_stack_init(void)
struct thread_info *ti;
 
ti = alloc_stack(limit, i);
-   memset(ti, 0, THREAD_SIZE);
emerg_stack_init_thread_info(ti, i);
paca_ptrs[i]->emergency_sp = (void *)ti + THREAD_SIZE;
 
 #ifdef CONFIG_PPC_BOOK3S_64
/* emergency stack for NMI exception handling. */
ti = alloc_stack(limit, i);
-   memset(ti, 0, THREAD_SIZE);
emerg_stack_init_thread_info(ti, i);
paca_ptrs[i]->nmi_emergency_sp = (void *)ti + THREAD_SIZE;
 
/* emergency stack for machine check exception handling. */
ti = alloc_stack(limit, i);
-   memset(ti, 0, THREAD_SIZE);
emerg_stack_init_thread_info(ti, i);
   

[PATCH v3 0/6] memblock: simplify several early memory allocation

2018-12-09 Thread Mike Rapoport
Hi,

These patches simplify some of the early memory allocations by replacing
usage of older memblock APIs with newer and shinier ones.

Quite a few places in the arch/ code allocated memory using a memblock API
that returns a physical address of the allocated area, then converted this
physical address to a virtual one and then used memset(0) to clear the
allocated range.

More recent memblock APIs do all the three steps in one call and their
usage simplifies the code.

It's important to note that regardless of API used, the core allocation is
nearly identical for any set of memblock allocators: first it tries to find
a free memory with all the constraints specified by the caller and then
falls back to the allocation with some or all constraints disabled.

The first three patches perform the conversion of call sites that have
exact requirements for the node and the possible memory range.

The fourth patch is a bit one-off as it simplifies openrisc's
implementation of pte_alloc_one_kernel(), and not only the memblock usage.

The fifth patch takes care of simpler cases when the allocation can be
satisfied with a simple call to memblock_alloc().

The sixth patch removes one-liner wrappers for memblock_alloc on arm and
unicore32, as suggested by Christoph.

v3:
* added Tested-by from Michal Simek for microblaze changes
* updated powerpc changes as per Michael Ellerman comments:
  - use allocations that clear memory in alloc_paca_data() and alloc_stack()
  - ensure the replacement is equivalent to old API

v2:
* added Ack from Stafford Horne for openrisc changes
* entirely drop early_alloc wrappers on arm and unicore32, as per Christoph
Hellwig



Mike Rapoport (6):
  powerpc: prefer memblock APIs returning virtual address
  microblaze: prefer memblock API returning virtual address
  sh: prefer memblock APIs returning virtual address
  openrisc: simplify pte_alloc_one_kernel()
  arch: simplify several early memory allocations
  arm, unicore32: remove early_alloc*() wrappers

 arch/arm/mm/mmu.c  | 13 +++--
 arch/c6x/mm/dma-coherent.c |  9 ++---
 arch/microblaze/mm/init.c  |  5 +++--
 arch/nds32/mm/init.c   | 12 
 arch/openrisc/mm/ioremap.c | 11 ---
 arch/powerpc/kernel/paca.c | 16 ++--
 arch/powerpc/kernel/setup-common.c |  4 ++--
 arch/powerpc/kernel/setup_64.c | 24 ++--
 arch/powerpc/mm/hash_utils_64.c|  6 +++---
 arch/powerpc/mm/pgtable-book3e.c   |  8 ++--
 arch/powerpc/mm/pgtable-book3s64.c |  5 +
 arch/powerpc/mm/pgtable-radix.c| 25 +++--
 arch/powerpc/mm/pgtable_32.c   |  4 +---
 arch/powerpc/mm/ppc_mmu_32.c   |  3 +--
 arch/powerpc/platforms/pasemi/iommu.c  |  5 +++--
 arch/powerpc/platforms/powernv/opal.c  |  3 +--
 arch/powerpc/platforms/pseries/setup.c | 18 ++
 arch/powerpc/sysdev/dart_iommu.c   |  7 +--
 arch/sh/mm/init.c  | 18 +-
 arch/sh/mm/numa.c  |  5 ++---
 arch/sparc/kernel/prom_64.c|  7 ++-
 arch/sparc/mm/init_64.c|  9 +++--
 arch/unicore32/mm/mmu.c| 14 --
 23 files changed, 88 insertions(+), 143 deletions(-)

-- 
2.7.4



Re: use generic DMA mapping code in powerpc V4

2018-12-09 Thread Christian Zigotzky
Next step: 602307b034734ce77a05da4b99333a2eaf6b6482 (powerpc/fsl_pci: 
simplify fsl_pci_dma_set_mask)


git checkout 602307b034734ce77a05da4b99333a2eaf6b6482

The PASEMI onboard ethernet works and the X5000 boots.

-- Christian


On 08 December 2018 at 2:47PM, Christian Zigotzky wrote:
Next step: e15cd8173ef85e9cc3e2a9c7cc2982f5c1355615 (powerpc/dma: fix 
an off-by-one in dma_capable)


git checkout e15cd8173ef85e9cc3e2a9c7cc2982f5c1355615

The PASEMI onboard ethernet also works with this commit and the X5000 
boots without any problems.


-- Christian


On 08 December 2018 at 11:29AM, Christian Zigotzky wrote:
Next step: 7ebc44c535f6bd726d553756d38b137acc718443 (powerpc/dma: 
remove max_direct_dma_addr)


git checkout 7ebc44c535f6bd726d553756d38b137acc718443

OK, the PASEMI onboard ethernet works and the P5020 board boots.

-- Christian


On 07 December 2018 at 7:33PM, Christian Zigotzky wrote:
Next step: 13c1fdec5682b6e13257277fa16aa31f342d167d (powerpc/dma: 
move pci_dma_dev_setup_swiotlb to fsl_pci.c)


git checkout 13c1fdec5682b6e13257277fa16aa31f342d167d

Result: The PASEMI onboard ethernet works and the P5020 board boots.

— Christian











Re: [PATCH v3 06/12] arm: perf/core: use PERF_PMU_CAP_NO_EXCLUDE for exclude incapable PMUs

2018-12-09 Thread Shawn Guo
On Thu, Dec 06, 2018 at 04:47:23PM +, Andrew Murray wrote:
> For drivers that do not support context exclusion let's advertise the
> PERF_PMU_CAP_NO_EXCLUDE capability. This ensures that perf will
> prevent us from handling events where any exclusion flags are set.
> Let's also remove the now unnecessary check for exclusion flags.
> 
> Signed-off-by: Andrew Murray 
> ---
>  arch/arm/mach-imx/mmdc.c | 9 ++---

For imx mmdc changes:

Acked-by: Shawn Guo 


Re: [PATCH 12/34] powerpc/cell: move dma direct window setup out of dma_configure

2018-12-09 Thread Michael Ellerman
Christoph Hellwig  writes:

> Configure the dma settings at device setup time, and stop playing games
> with get_pci_dma_ops.  This prepares for using the common dma_configure
> code later on.
>
> Signed-off-by: Christoph Hellwig 
> ---
>  arch/powerpc/platforms/cell/iommu.c | 20 +++-
>  1 file changed, 11 insertions(+), 9 deletions(-)

This one's crashing, haven't dug into why yet:

  [1.347085] Unable to handle kernel paging request for data at address 
0x0040
  [1.391505] Faulting instruction address: 0xc06b6e6c
  cpu 0x0: Vector: 380 (Data SLB Access) at [c007fc9032d0]
  pc: c06b6e6c: .of_n_addr_cells+0x34/0xc0
  lr: c0070b30: .cell_iommu_get_fixed_address+0x58/0x2b0
  sp: c007fc903560
  msr: 90009032
  dar: 40
  current = 0xc007fc8d
  paca= 0xc0f6   irqmask: 0x03   irq_happened: 0x01
  pid   = 1, comm = swapper/0
  Linux version 4.20.0-rc2-gcc7x-g1e32f48 (kerkins@p82) (gcc version 7.4.1 
20181208 (Custom eb377405ab2d1900)) #1 SMP Sun Dec 9 12:16:48 AEDT 2018
  enter ? for help
  [c007fc9035f0] c0070b30 .cell_iommu_get_fixed_address+0x58/0x2b0
  [c007fc9036c0] c00711ac .cell_dma_dev_setup.part.1+0x24/0x118
  [c007fc903740] c0071374 .cell_of_bus_notify+0x6c/0xbc
  [c007fc9037c0] c00e7ef0 .notifier_call_chain+0x90/0xf8
  [c007fc903860] c00e8c2c .blocking_notifier_call_chain+0x84/0xb8
  [c007fc9038f0] c0597544 .device_add+0x584/0x7b8
  [c007fc9039c0] c05a0308 .platform_device_add+0x148/0x2f0
  [c007fc903a60] c05a1508 .platform_device_register_full+0x148/0x168
  [c007fc903ae0] c0a9a8a0 
.__machine_initcall_cell_cell_publish_devices+0x1bc/0x210
  [c007fc903be0] c000eca4 .do_one_initcall+0x64/0x2d8
  [c007fc903cc0] c0a844ec .kernel_init_freeable+0x3dc/0x4e4
  [c007fc903da0] c000f06c .kernel_init+0x24/0x150
  [c007fc903e20] c000a9c0 .ret_from_kernel_thread+0x58/0x78

cheers

> diff --git a/arch/powerpc/platforms/cell/iommu.c 
> b/arch/powerpc/platforms/cell/iommu.c
> index 12352a58072a..cce5bf9515e5 100644
> --- a/arch/powerpc/platforms/cell/iommu.c
> +++ b/arch/powerpc/platforms/cell/iommu.c
> @@ -657,14 +657,21 @@ static const struct dma_map_ops dma_iommu_fixed_ops = {
>   .mapping_error  = dma_iommu_mapping_error,
>  };
>  
> +static u64 cell_iommu_get_fixed_address(struct device *dev);
> +
>  static void cell_dma_dev_setup(struct device *dev)
>  {
> - if (get_pci_dma_ops() == _iommu_ops)
> + if (get_pci_dma_ops() == _iommu_ops) {
> + u64 addr = cell_iommu_get_fixed_address(dev);
> +
> + if (addr != OF_BAD_ADDR)
> + set_dma_offset(dev, addr + dma_iommu_fixed_base);
>   set_iommu_table_base(dev, cell_get_iommu_table(dev));
> - else if (get_pci_dma_ops() == _nommu_ops)
> + } else if (get_pci_dma_ops() == _nommu_ops) {
>   set_dma_offset(dev, cell_dma_nommu_offset);
> - else
> + } else {
>   BUG();
> + }
>  }
>  
>  static void cell_pci_dma_dev_setup(struct pci_dev *dev)
> @@ -950,19 +957,14 @@ static int dma_suported_and_switch(struct device *dev, 
> u64 dma_mask)
>  {
>   if (dma_mask == DMA_BIT_MASK(64) &&
>   cell_iommu_get_fixed_address(dev) != OF_BAD_ADDR) {
> - u64 addr = cell_iommu_get_fixed_address(dev) +
> - dma_iommu_fixed_base;
>   dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n");
> - dev_dbg(dev, "iommu: fixed addr = %llx\n", addr);
>   set_dma_ops(dev, _iommu_fixed_ops);
> - set_dma_offset(dev, addr);
>   return 1;
>   }
>  
>   if (dma_iommu_dma_supported(dev, dma_mask)) {
>   dev_dbg(dev, "iommu: not 64-bit, using default ops\n");
> - set_dma_ops(dev, get_pci_dma_ops());
> - cell_dma_dev_setup(dev);
> + set_dma_ops(dev, _iommu_ops);
>   return 1;
>   }
>  
> -- 
> 2.19.1


[PATCH 2/2] powerpc/perf: Update perf_regs structure to include MMCRA

2018-12-09 Thread Madhavan Srinivasan
On each sample, Monitor Mode Control Register A (MMCRA) content
is saved in pt_regs. MMCRA does not have a entry as-is in the pt_regs
but instead, MMCRA content is saved in the "dsisr" register of pt_regs.

Patch adds another entry to the perf_regs structure to include the "MMCRA"
printing which internally maps to the "dsisr" of pt_regs.

It also check for the MMCRA availability in the platform and present
value accordingly

Signed-off-by: Madhavan Srinivasan 
---
 arch/powerpc/include/uapi/asm/perf_regs.h   | 1 +
 arch/powerpc/perf/perf_regs.c   | 6 ++
 tools/arch/powerpc/include/uapi/asm/perf_regs.h | 1 +
 tools/perf/arch/powerpc/include/perf_regs.h | 3 ++-
 tools/perf/arch/powerpc/util/perf_regs.c| 1 +
 5 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/uapi/asm/perf_regs.h 
b/arch/powerpc/include/uapi/asm/perf_regs.h
index ff91192407d1..f599064dd8dc 100644
--- a/arch/powerpc/include/uapi/asm/perf_regs.h
+++ b/arch/powerpc/include/uapi/asm/perf_regs.h
@@ -47,6 +47,7 @@ enum perf_event_powerpc_regs {
PERF_REG_POWERPC_DAR,
PERF_REG_POWERPC_DSISR,
PERF_REG_POWERPC_SIER,
+   PERF_REG_POWERPC_MMCRA,
PERF_REG_POWERPC_MAX,
 };
 #endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */
diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c
index 5c36b3a8d47a..3349f3f8fe84 100644
--- a/arch/powerpc/perf/perf_regs.c
+++ b/arch/powerpc/perf/perf_regs.c
@@ -70,6 +70,7 @@ static unsigned int pt_regs_offset[PERF_REG_POWERPC_MAX] = {
PT_REGS_OFFSET(PERF_REG_POWERPC_DAR, dar),
PT_REGS_OFFSET(PERF_REG_POWERPC_DSISR, dsisr),
PT_REGS_OFFSET(PERF_REG_POWERPC_SIER, dar),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_MMCRA, dsisr),
 };
 
 u64 perf_reg_value(struct pt_regs *regs, int idx)
@@ -83,6 +84,11 @@ u64 perf_reg_value(struct pt_regs *regs, int idx)
!is_sier_available()))
return 0;
 
+   if (idx == PERF_REG_POWERPC_MMCRA &&
+  (IS_ENABLED(CONFIG_FSL_EMB_PERF_EVENT) ||
+   IS_ENABLED(CONFIG_PPC32)))
+   return 0;
+
return regs_get_register(regs, pt_regs_offset[idx]);
 }
 
diff --git a/tools/arch/powerpc/include/uapi/asm/perf_regs.h 
b/tools/arch/powerpc/include/uapi/asm/perf_regs.h
index ff91192407d1..f599064dd8dc 100644
--- a/tools/arch/powerpc/include/uapi/asm/perf_regs.h
+++ b/tools/arch/powerpc/include/uapi/asm/perf_regs.h
@@ -47,6 +47,7 @@ enum perf_event_powerpc_regs {
PERF_REG_POWERPC_DAR,
PERF_REG_POWERPC_DSISR,
PERF_REG_POWERPC_SIER,
+   PERF_REG_POWERPC_MMCRA,
PERF_REG_POWERPC_MAX,
 };
 #endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */
diff --git a/tools/perf/arch/powerpc/include/perf_regs.h 
b/tools/perf/arch/powerpc/include/perf_regs.h
index 1076393e6f43..e18a3556f5e3 100644
--- a/tools/perf/arch/powerpc/include/perf_regs.h
+++ b/tools/perf/arch/powerpc/include/perf_regs.h
@@ -63,7 +63,8 @@ static const char *reg_names[] = {
[PERF_REG_POWERPC_TRAP] = "trap",
[PERF_REG_POWERPC_DAR] = "dar",
[PERF_REG_POWERPC_DSISR] = "dsisr",
-   [PERF_REG_POWERPC_SIER] = "sier"
+   [PERF_REG_POWERPC_SIER] = "sier",
+   [PERF_REG_POWERPC_MMCRA] = "mmcra"
 };
 
 static inline const char *perf_reg_name(int id)
diff --git a/tools/perf/arch/powerpc/util/perf_regs.c 
b/tools/perf/arch/powerpc/util/perf_regs.c
index 07fcd977d93e..34d5134681d9 100644
--- a/tools/perf/arch/powerpc/util/perf_regs.c
+++ b/tools/perf/arch/powerpc/util/perf_regs.c
@@ -53,6 +53,7 @@ const struct sample_reg sample_reg_masks[] = {
SMPL_REG(dar, PERF_REG_POWERPC_DAR),
SMPL_REG(dsisr, PERF_REG_POWERPC_DSISR),
SMPL_REG(sier, PERF_REG_POWERPC_SIER),
+   SMPL_REG(mmcra, PERF_REG_POWERPC_MMCRA),
SMPL_REG_END
 };
 
-- 
2.7.4



[PATCH v2 1/2] powerpc/perf: Update perf_regs structure to include SIER

2018-12-09 Thread Madhavan Srinivasan
On each sample, Sample Instruction Event Register (SIER) content
is saved in pt_regs. SIER does not have a entry as-is in the pt_regs
but instead, SIER content is saved in the "dar" register of pt_regs.

Patch adds another entry to the perf_regs structure to include the "SIER"
printing which internally maps to the "dar" of pt_regs.

It also check for the SIER availability in the platform and present
value accordingly

Signed-off-by: Madhavan Srinivasan 
---
Changelog v1:
- Added checks to address other powerpc platform when
  updating SIER register value

 arch/powerpc/include/asm/perf_event.h   | 3 +++
 arch/powerpc/include/uapi/asm/perf_regs.h   | 1 +
 arch/powerpc/perf/core-book3s.c | 8 
 arch/powerpc/perf/perf_regs.c   | 7 +++
 tools/arch/powerpc/include/uapi/asm/perf_regs.h | 1 +
 tools/perf/arch/powerpc/include/perf_regs.h | 3 ++-
 tools/perf/arch/powerpc/util/perf_regs.c| 1 +
 7 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/perf_event.h 
b/arch/powerpc/include/asm/perf_event.h
index 8bf1b6351716..660f65b98bfb 100644
--- a/arch/powerpc/include/asm/perf_event.h
+++ b/arch/powerpc/include/asm/perf_event.h
@@ -37,4 +37,7 @@
(regs)->gpr[1] = current_stack_pointer();   \
asm volatile("mfmsr %0" : "=r" ((regs)->msr));  \
} while (0)
+
+/* To support perf_regs sier update */
+extern bool is_sier_available(void);
 #endif
diff --git a/arch/powerpc/include/uapi/asm/perf_regs.h 
b/arch/powerpc/include/uapi/asm/perf_regs.h
index 9e52c86ccbd3..ff91192407d1 100644
--- a/arch/powerpc/include/uapi/asm/perf_regs.h
+++ b/arch/powerpc/include/uapi/asm/perf_regs.h
@@ -46,6 +46,7 @@ enum perf_event_powerpc_regs {
PERF_REG_POWERPC_TRAP,
PERF_REG_POWERPC_DAR,
PERF_REG_POWERPC_DSISR,
+   PERF_REG_POWERPC_SIER,
PERF_REG_POWERPC_MAX,
 };
 #endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 81f8a0c838ae..b4976cae1005 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -130,6 +130,14 @@ static inline void power_pmu_bhrb_read(struct 
cpu_hw_events *cpuhw) {}
 static void pmao_restore_workaround(bool ebb) { }
 #endif /* CONFIG_PPC32 */
 
+bool is_sier_available(void)
+{
+   if (ppmu->flags & PPMU_HAS_SIER)
+   return true;
+
+   return false;
+}
+
 static bool regs_use_siar(struct pt_regs *regs)
 {
/*
diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c
index 09ceea6175ba..5c36b3a8d47a 100644
--- a/arch/powerpc/perf/perf_regs.c
+++ b/arch/powerpc/perf/perf_regs.c
@@ -69,6 +69,7 @@ static unsigned int pt_regs_offset[PERF_REG_POWERPC_MAX] = {
PT_REGS_OFFSET(PERF_REG_POWERPC_TRAP, trap),
PT_REGS_OFFSET(PERF_REG_POWERPC_DAR, dar),
PT_REGS_OFFSET(PERF_REG_POWERPC_DSISR, dsisr),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_SIER, dar),
 };
 
 u64 perf_reg_value(struct pt_regs *regs, int idx)
@@ -76,6 +77,12 @@ u64 perf_reg_value(struct pt_regs *regs, int idx)
if (WARN_ON_ONCE(idx >= PERF_REG_POWERPC_MAX))
return 0;
 
+   if (idx == PERF_REG_POWERPC_SIER &&
+  (IS_ENABLED(CONFIG_FSL_EMB_PERF_EVENT) ||
+   IS_ENABLED(CONFIG_PPC32) ||
+   !is_sier_available()))
+   return 0;
+
return regs_get_register(regs, pt_regs_offset[idx]);
 }
 
diff --git a/tools/arch/powerpc/include/uapi/asm/perf_regs.h 
b/tools/arch/powerpc/include/uapi/asm/perf_regs.h
index 9e52c86ccbd3..ff91192407d1 100644
--- a/tools/arch/powerpc/include/uapi/asm/perf_regs.h
+++ b/tools/arch/powerpc/include/uapi/asm/perf_regs.h
@@ -46,6 +46,7 @@ enum perf_event_powerpc_regs {
PERF_REG_POWERPC_TRAP,
PERF_REG_POWERPC_DAR,
PERF_REG_POWERPC_DSISR,
+   PERF_REG_POWERPC_SIER,
PERF_REG_POWERPC_MAX,
 };
 #endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */
diff --git a/tools/perf/arch/powerpc/include/perf_regs.h 
b/tools/perf/arch/powerpc/include/perf_regs.h
index 00e37b106913..1076393e6f43 100644
--- a/tools/perf/arch/powerpc/include/perf_regs.h
+++ b/tools/perf/arch/powerpc/include/perf_regs.h
@@ -62,7 +62,8 @@ static const char *reg_names[] = {
[PERF_REG_POWERPC_SOFTE] = "softe",
[PERF_REG_POWERPC_TRAP] = "trap",
[PERF_REG_POWERPC_DAR] = "dar",
-   [PERF_REG_POWERPC_DSISR] = "dsisr"
+   [PERF_REG_POWERPC_DSISR] = "dsisr",
+   [PERF_REG_POWERPC_SIER] = "sier"
 };
 
 static inline const char *perf_reg_name(int id)
diff --git a/tools/perf/arch/powerpc/util/perf_regs.c 
b/tools/perf/arch/powerpc/util/perf_regs.c
index ec50939b0418..07fcd977d93e 100644
--- a/tools/perf/arch/powerpc/util/perf_regs.c
+++ b/tools/perf/arch/powerpc/util/perf_regs.c
@@ -52,6 +52,7 @@ const struct sample_reg sample_reg_masks[] = {
SMPL_REG(trap, PERF_REG_POWERPC_TRAP),
SMPL_REG(dar, 

[PATCH] powerpc/perf: Fix thersholding counter data for unknown type

2018-12-09 Thread Madhavan Srinivasan
MMCRA[34:36] and MMCRA[38:44] expose the thresholding counter value.
Thresholding counter can be used to count latency cycles such as
load miss to reload. But threshold counter value is not relevant
when the sampled instruction type is unknown or reserved. Patch to
fix the thersholding counter value to zero when sampled instruction
type is unknown or reserved.

Fixes: 170a315f41c6('powerpc/perf: Support to export MMCRA[TEC*] field to 
userspace')
Signed-off-by: Madhavan Srinivasan 
---
 arch/powerpc/perf/isa207-common.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/perf/isa207-common.c 
b/arch/powerpc/perf/isa207-common.c
index 177de814286f..6a2f65d3d088 100644
--- a/arch/powerpc/perf/isa207-common.c
+++ b/arch/powerpc/perf/isa207-common.c
@@ -226,8 +226,13 @@ void isa207_get_mem_weight(u64 *weight)
u64 mmcra = mfspr(SPRN_MMCRA);
u64 exp = MMCRA_THR_CTR_EXP(mmcra);
u64 mantissa = MMCRA_THR_CTR_MANT(mmcra);
+   u64 sier = mfspr(SPRN_SIER);
+   u64 val = (sier & ISA207_SIER_TYPE_MASK) >> ISA207_SIER_TYPE_SHIFT;
 
-   *weight = mantissa << (2 * exp);
+   if (val == 0 || val == 7)
+   *weight = 0;
+   else
+   *weight = mantissa << (2 * exp);
 }
 
 int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
-- 
2.7.4