[PATCH v4] trace/kprobe: Display the actual notrace function when rejecting a probe

2023-12-13 Thread Naveen N Rao
Trying to probe update_sd_lb_stats() using perf results in the below
message in the kernel log:
trace_kprobe: Could not probe notrace function _text

This is because 'perf probe' specifies the kprobe location as an offset
from '_text':
$ sudo perf probe -D update_sd_lb_stats
p:probe/update_sd_lb_stats _text+1830728

However, the error message is misleading and doesn't help convey the
actual notrace function that is being probed. Fix this by looking up the
actual function name that is being probed. With this fix, we now get the
below message in the kernel log:
trace_kprobe: Could not probe notrace function 
update_sd_lb_stats.constprop.0

Signed-off-by: Naveen N Rao 
---
v4: Use printk format specifier %ps with probe address to lookup the 
symbol, as suggested by Masami.

 kernel/trace/trace_kprobe.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 3d7a180a8427..0017404d6e8d 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -487,8 +487,8 @@ static int __register_trace_kprobe(struct trace_kprobe *tk)
return -EINVAL;
 
if (within_notrace_func(tk)) {
-   pr_warn("Could not probe notrace function %s\n",
-   trace_kprobe_symbol(tk));
+   pr_warn("Could not probe notrace function %ps\n",
+   (void *)trace_kprobe_address(tk));
return -EINVAL;
}
 

base-commit: 4758560fa268cecfa1144f015aa9f2525d164b7e
-- 
2.43.0




Re: [PATCH v3] trace/kprobe: Display the actual notrace function when rejecting a probe

2023-12-13 Thread Naveen N Rao
On Thu, Dec 14, 2023 at 08:02:10AM +0900, Masami Hiramatsu wrote:
> On Wed, 13 Dec 2023 20:09:14 +0530
> Naveen N Rao  wrote:
> 
> > Trying to probe update_sd_lb_stats() using perf results in the below
> > message in the kernel log:
> >   trace_kprobe: Could not probe notrace function _text
> > 
> > This is because 'perf probe' specifies the kprobe location as an offset
> > from '_text':
> >   $ sudo perf probe -D update_sd_lb_stats
> >   p:probe/update_sd_lb_stats _text+1830728
> > 
> > However, the error message is misleading and doesn't help convey the
> > actual notrace function that is being probed. Fix this by looking up the
> > actual function name that is being probed. With this fix, we now get the
> > below message in the kernel log:
> >   trace_kprobe: Could not probe notrace function 
> > update_sd_lb_stats.constprop.0
> > 
> > Signed-off-by: Naveen N Rao 
> > ---
> > v3: Remove tk parameter from within_notrace_func() as suggested by 
> > Masami
> > 
> >  kernel/trace/trace_kprobe.c | 11 ++-
> >  1 file changed, 6 insertions(+), 5 deletions(-)
> > 
> > diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
> > index 3d7a180a8427..dc36c6ed6131 100644
> > --- a/kernel/trace/trace_kprobe.c
> > +++ b/kernel/trace/trace_kprobe.c
> > @@ -449,9 +449,8 @@ static bool __within_notrace_func(unsigned long addr)
> > return !ftrace_location_range(addr, addr + size - 1);
> >  }
> >  
> > -static bool within_notrace_func(struct trace_kprobe *tk)
> > +static bool within_notrace_func(unsigned long addr)
> >  {
> > -   unsigned long addr = trace_kprobe_address(tk);
> > char symname[KSYM_NAME_LEN], *p;
> >  
> > if (!__within_notrace_func(addr))
> > @@ -471,12 +470,14 @@ static bool within_notrace_func(struct trace_kprobe 
> > *tk)
> > return true;
> >  }
> >  #else
> > -#define within_notrace_func(tk)(false)
> > +#define within_notrace_func(addr)  (false)
> >  #endif
> >  
> >  /* Internal register function - just handle k*probes and flags */
> >  static int __register_trace_kprobe(struct trace_kprobe *tk)
> >  {
> > +   unsigned long addr = trace_kprobe_address(tk);
> > +   char symname[KSYM_NAME_LEN];
> > int i, ret;
> >  
> > ret = security_locked_down(LOCKDOWN_KPROBES);
> > @@ -486,9 +487,9 @@ static int __register_trace_kprobe(struct trace_kprobe 
> > *tk)
> > if (trace_kprobe_is_registered(tk))
> > return -EINVAL;
> >  
> > -   if (within_notrace_func(tk)) {
> > +   if (within_notrace_func(addr)) {
> > pr_warn("Could not probe notrace function %s\n",
> > -   trace_kprobe_symbol(tk));
> > +   lookup_symbol_name(addr, symname) ? 
> > trace_kprobe_symbol(tk) : symname);
> 
> Can we just use %ps and (void *)trace_kprobe_address(tk) here?
> 
> That will be simpler.

Indeed - that is much simpler. v4 on its way...

Thanks!
- Naveen



[PATCH v3] trace/kprobe: Display the actual notrace function when rejecting a probe

2023-12-13 Thread Naveen N Rao
Trying to probe update_sd_lb_stats() using perf results in the below
message in the kernel log:
  trace_kprobe: Could not probe notrace function _text

This is because 'perf probe' specifies the kprobe location as an offset
from '_text':
  $ sudo perf probe -D update_sd_lb_stats
  p:probe/update_sd_lb_stats _text+1830728

However, the error message is misleading and doesn't help convey the
actual notrace function that is being probed. Fix this by looking up the
actual function name that is being probed. With this fix, we now get the
below message in the kernel log:
  trace_kprobe: Could not probe notrace function update_sd_lb_stats.constprop.0

Signed-off-by: Naveen N Rao 
---
v3: Remove tk parameter from within_notrace_func() as suggested by 
Masami

 kernel/trace/trace_kprobe.c | 11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 3d7a180a8427..dc36c6ed6131 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -449,9 +449,8 @@ static bool __within_notrace_func(unsigned long addr)
return !ftrace_location_range(addr, addr + size - 1);
 }
 
-static bool within_notrace_func(struct trace_kprobe *tk)
+static bool within_notrace_func(unsigned long addr)
 {
-   unsigned long addr = trace_kprobe_address(tk);
char symname[KSYM_NAME_LEN], *p;
 
if (!__within_notrace_func(addr))
@@ -471,12 +470,14 @@ static bool within_notrace_func(struct trace_kprobe *tk)
return true;
 }
 #else
-#define within_notrace_func(tk)(false)
+#define within_notrace_func(addr)  (false)
 #endif
 
 /* Internal register function - just handle k*probes and flags */
 static int __register_trace_kprobe(struct trace_kprobe *tk)
 {
+   unsigned long addr = trace_kprobe_address(tk);
+   char symname[KSYM_NAME_LEN];
int i, ret;
 
ret = security_locked_down(LOCKDOWN_KPROBES);
@@ -486,9 +487,9 @@ static int __register_trace_kprobe(struct trace_kprobe *tk)
if (trace_kprobe_is_registered(tk))
return -EINVAL;
 
-   if (within_notrace_func(tk)) {
+   if (within_notrace_func(addr)) {
pr_warn("Could not probe notrace function %s\n",
-   trace_kprobe_symbol(tk));
+   lookup_symbol_name(addr, symname) ? 
trace_kprobe_symbol(tk) : symname);
return -EINVAL;
}
 

base-commit: 4758560fa268cecfa1144f015aa9f2525d164b7e
-- 
2.43.0




Re: [PATCH v1 2/2] powerpc: Enable OPTPROBES on PPC32

2021-04-20 Thread Naveen N. Rao

Christophe Leroy wrote:

For that, create a 32 bits version of patch_imm64_load_insns()
and create a patch_imm_load_insns() which calls
patch_imm32_load_insns() on PPC32 and patch_imm64_load_insns()
on PPC64.

Adapt optprobes_head.S for PPC32. Use PPC_LL/PPC_STL macros instead
of raw ld/std, opt out things linked to paca and use stmw/lmw to
save/restore registers.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/Kconfig |  2 +-
 arch/powerpc/kernel/optprobes.c  | 24 +--
 arch/powerpc/kernel/optprobes_head.S | 46 +++-
 3 files changed, 53 insertions(+), 19 deletions(-)


Thanks for adding support for ppc32. It is good to see that it works 
without too many changes.




diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index c1344c05226c..49b538e54efb 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -227,7 +227,7 @@ config PPC
select HAVE_MOD_ARCH_SPECIFIC
select HAVE_NMI if PERF_EVENTS || (PPC64 && 
PPC_BOOK3S)
select HAVE_HARDLOCKUP_DETECTOR_ARCHif PPC64 && PPC_BOOK3S && SMP
-   select HAVE_OPTPROBES   if PPC64
+   select HAVE_OPTPROBES
select HAVE_PERF_EVENTS
select HAVE_PERF_EVENTS_NMI if PPC64
select HAVE_HARDLOCKUP_DETECTOR_PERFif PERF_EVENTS && HAVE_PERF_EVENTS_NMI 
&& !HAVE_HARDLOCKUP_DETECTOR_ARCH
diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
index 58fdb9f66e0f..cdf87086fa33 100644
--- a/arch/powerpc/kernel/optprobes.c
+++ b/arch/powerpc/kernel/optprobes.c
@@ -141,11 +141,21 @@ void arch_remove_optimized_kprobe(struct optimized_kprobe 
*op)
}
 }

+static void patch_imm32_load_insns(unsigned long val, int reg, kprobe_opcode_t 
*addr)
+{
+   patch_instruction((struct ppc_inst *)addr,
+ ppc_inst(PPC_RAW_LIS(reg, IMM_H(val;
+   addr++;
+
+   patch_instruction((struct ppc_inst *)addr,
+ ppc_inst(PPC_RAW_ORI(reg, reg, IMM_L(val;
+}
+
 /*
  * Generate instructions to load provided immediate 64-bit value
  * to register 'reg' and patch these instructions at 'addr'.
  */
-static void patch_imm64_load_insns(unsigned long val, int reg, kprobe_opcode_t 
*addr)
+static void patch_imm64_load_insns(unsigned long long val, int reg, 
kprobe_opcode_t *addr)


Do you really need this?


 {
/* lis reg,(op)@highest */
patch_instruction((struct ppc_inst *)addr,
@@ -177,6 +187,14 @@ static void patch_imm64_load_insns(unsigned long val, int 
reg, kprobe_opcode_t *
   ___PPC_RS(reg) | (val & 0x)));
 }

+static void patch_imm_load_insns(unsigned long val, int reg, kprobe_opcode_t 
*addr)
+{
+   if (IS_ENABLED(CONFIG_PPC64))
+   patch_imm64_load_insns(val, reg, addr);
+   else
+   patch_imm32_load_insns(val, reg, addr);
+}
+
 int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe 
*p)
 {
struct ppc_inst branch_op_callback, branch_emulate_step, temp;
@@ -230,7 +248,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe 
*op, struct kprobe *p)
 * Fixup the template with instructions to:
 * 1. load the address of the actual probepoint
 */
-   patch_imm64_load_insns((unsigned long)op, 3, buff + TMPL_OP_IDX);
+   patch_imm_load_insns((unsigned long)op, 3, buff + TMPL_OP_IDX);

/*
 * 2. branch to optimized_callback() and emulate_step()
@@ -264,7 +282,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe 
*op, struct kprobe *p)
 * 3. load instruction to be emulated into relevant register, and
 */
temp = ppc_inst_read((struct ppc_inst *)p->ainsn.insn);
-   patch_imm64_load_insns(ppc_inst_as_ulong(temp), 4, buff + 
TMPL_INSN_IDX);
+   patch_imm_load_insns(ppc_inst_as_ulong(temp), 4, buff + TMPL_INSN_IDX);

/*
 * 4. branch back from trampoline
diff --git a/arch/powerpc/kernel/optprobes_head.S 
b/arch/powerpc/kernel/optprobes_head.S
index ff8ba4d3824d..49f31e554573 100644
--- a/arch/powerpc/kernel/optprobes_head.S
+++ b/arch/powerpc/kernel/optprobes_head.S
@@ -30,39 +30,47 @@ optinsn_slot:
.global optprobe_template_entry
 optprobe_template_entry:
/* Create an in-memory pt_regs */
-   stdur1,-INT_FRAME_SIZE(r1)
+   PPC_STLUr1,-INT_FRAME_SIZE(r1)
SAVE_GPR(0,r1)
/* Save the previous SP into stack */
addir0,r1,INT_FRAME_SIZE
-   std r0,GPR1(r1)
+   PPC_STL r0,GPR1(r1)
+#ifdef CONFIG_PPC64
SAVE_10GPRS(2,r1)
SAVE_10GPRS(12,r1)
SAVE_10GPRS(22,r1)
+#else
+   stmwr2, GPR2(r1)
+#endif
/* Save SPRS */
mfmsr   r5
-   std r5,_MSR(r1)
+   PPC_STL r5,_MSR(r1)
li  r5,0x700
-   std r5,_TRAP(r1)
+   PPC_STL r5,_TRAP(r1)
li  r5,0
-   std r5,ORIG_GPR3(r1)

Re: [PATCH v4] powerpc/uprobes: Validation for prefixed instruction

2021-03-09 Thread Naveen N. Rao
On 2021/03/09 08:54PM, Michael Ellerman wrote:
> Ravi Bangoria  writes:
> > As per ISA 3.1, prefixed instruction should not cross 64-byte
> > boundary. So don't allow Uprobe on such prefixed instruction.
> >
> > There are two ways probed instruction is changed in mapped pages.
> > First, when Uprobe is activated, it searches for all the relevant
> > pages and replace instruction in them. In this case, if that probe
> > is on the 64-byte unaligned prefixed instruction, error out
> > directly. Second, when Uprobe is already active and user maps a
> > relevant page via mmap(), instruction is replaced via mmap() code
> > path. But because Uprobe is invalid, entire mmap() operation can
> > not be stopped. In this case just print an error and continue.
> >
> > Signed-off-by: Ravi Bangoria 
> > Acked-by: Naveen N. Rao 
> 
> Do we have a Fixes: tag for this?

Since this is an additional check we are adding, I don't think we should 
add a Fixes: tag. Nothing is broken per-se -- we're just adding more 
checks to catch simple mistakes. Also, like Oleg pointed out, there are 
still many other ways for users to shoot themselves in the foot with 
uprobes and prefixed instructions, if they so desire.

However, if you still think we should add a Fixes: tag, we can perhaps 
use the below commit since I didn't see any specific commit adding 
support for prefixed instructions for uprobes:

Fixes: 650b55b707fdfa ("powerpc: Add prefixed instructions to 
instruction data type")

> 
> > ---
> > v3: 
> > https://lore.kernel.org/r/20210304050529.59391-1-ravi.bango...@linux.ibm.com
> > v3->v4:
> >   - CONFIG_PPC64 check was not required, remove it.
> >   - Use SZ_ macros instead of hardcoded numbers.
> >
> >  arch/powerpc/kernel/uprobes.c | 7 +++
> >  1 file changed, 7 insertions(+)
> >
> > diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c
> > index e8a63713e655..4cbfff6e94a3 100644
> > --- a/arch/powerpc/kernel/uprobes.c
> > +++ b/arch/powerpc/kernel/uprobes.c
> > @@ -41,6 +41,13 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe,
> > if (addr & 0x03)
> > return -EINVAL;
> >  
> > +   if (cpu_has_feature(CPU_FTR_ARCH_31) &&
> > +   ppc_inst_prefixed(auprobe->insn) &&
> > +   (addr & (SZ_64 - 4)) == SZ_64 - 4) {
> > +   pr_info_ratelimited("Cannot register a uprobe on 64 byte 
> > unaligned prefixed instruction\n");
> > +   return -EINVAL;
> 
> I realise we already did the 0x03 check above, but I still think this
> would be clearer simply as:
> 
>   (addr & 0x3f == 60)

Indeed, I like the use of `60' there -- hex is overrated ;)

- Naveen


Re: [PATCH v3] powerpc/uprobes: Validation for prefixed instruction

2021-03-03 Thread Naveen N. Rao
On 2021/03/04 10:35AM, Ravi Bangoria wrote:
> As per ISA 3.1, prefixed instruction should not cross 64-byte
> boundary. So don't allow Uprobe on such prefixed instruction.
> 
> There are two ways probed instruction is changed in mapped pages.
> First, when Uprobe is activated, it searches for all the relevant
> pages and replace instruction in them. In this case, if that probe
> is on the 64-byte unaligned prefixed instruction, error out
> directly. Second, when Uprobe is already active and user maps a
> relevant page via mmap(), instruction is replaced via mmap() code
> path. But because Uprobe is invalid, entire mmap() operation can
> not be stopped. In this case just print an error and continue.
> 
> Signed-off-by: Ravi Bangoria 
> ---
> v2: 
> https://lore.kernel.org/r/20210204104703.273429-1-ravi.bango...@linux.ibm.com
> v2->v3:
>   - Drop restriction for Uprobe on suffix of prefixed instruction.
> It needs lot of code change including generic code but what
> we get in return is not worth it.
> 
>  arch/powerpc/kernel/uprobes.c | 8 
>  1 file changed, 8 insertions(+)
> 
> diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c
> index e8a63713e655..c400971ebe70 100644
> --- a/arch/powerpc/kernel/uprobes.c
> +++ b/arch/powerpc/kernel/uprobes.c
> @@ -41,6 +41,14 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe,
>   if (addr & 0x03)
>   return -EINVAL;
>  
> + if (!IS_ENABLED(CONFIG_PPC64) || !cpu_has_feature(CPU_FTR_ARCH_31))
> + return 0;

Sorry, I missed this last time, but I think we can drop the above 
checks. ppc_inst_prefixed() already factors in the dependency on 
CONFIG_PPC64, and I don't think we need to confirm if we're running on a 
ISA V3.1 for the below check.

With that:
Acked-by: Naveen N. Rao 

> +
> + if (ppc_inst_prefixed(auprobe->insn) && (addr & 0x3F) == 0x3C) {
> + pr_info_ratelimited("Cannot register a uprobe on 64 byte 
> unaligned prefixed instruction\n");
> + return -EINVAL;
> + }
> +

- Naveen


Re: [PATCH v2] powerpc/uprobes: Validation for prefixed instruction

2021-02-04 Thread Naveen N. Rao
On 2021/02/04 06:38PM, Naveen N. Rao wrote:
> On 2021/02/04 04:17PM, Ravi Bangoria wrote:
> > Don't allow Uprobe on 2nd word of a prefixed instruction. As per
> > ISA 3.1, prefixed instruction should not cross 64-byte boundary.
> > So don't allow Uprobe on such prefixed instruction as well.
> > 
> > There are two ways probed instruction is changed in mapped pages.
> > First, when Uprobe is activated, it searches for all the relevant
> > pages and replace instruction in them. In this case, if we notice
> > that probe is on the 2nd word of prefixed instruction, error out
> > directly. Second, when Uprobe is already active and user maps a
> > relevant page via mmap(), instruction is replaced via mmap() code
> > path. But because Uprobe is invalid, entire mmap() operation can
> > not be stopped. In this case just print an error and continue.
> > 
> > Signed-off-by: Ravi Bangoria 
> > ---
> > v1: 
> > http://lore.kernel.org/r/20210119091234.76317-1-ravi.bango...@linux.ibm.com
> > v1->v2:
> >   - Instead of introducing new arch hook from verify_opcode(), use
> > existing hook arch_uprobe_analyze_insn().
> >   - Add explicit check for prefixed instruction crossing 64-byte
> > boundary. If probe is on such instruction, throw an error.
> > 
> >  arch/powerpc/kernel/uprobes.c | 66 ++-
> >  1 file changed, 65 insertions(+), 1 deletion(-)
> > 
> > diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c
> > index e8a63713e655..485d19a2a31f 100644
> > --- a/arch/powerpc/kernel/uprobes.c
> > +++ b/arch/powerpc/kernel/uprobes.c
> > @@ -7,6 +7,7 @@
> >   * Adapted from the x86 port by Ananth N Mavinakayanahalli 
> > 
> >   */
> >  #include 
> > +#include 
> >  #include 
> >  #include 
> >  #include 
> > @@ -28,6 +29,69 @@ bool is_trap_insn(uprobe_opcode_t *insn)
> > return (is_trap(*insn));
> >  }
> >  
> > +#ifdef CONFIG_PPC64
> > +static int get_instr(struct mm_struct *mm, unsigned long addr, u32 *instr)
> > +{
> > +   struct page *page;
> > +   struct vm_area_struct *vma;
> > +   void *kaddr;
> > +   unsigned int gup_flags = FOLL_FORCE | FOLL_SPLIT_PMD;
> > +
> > +   if (get_user_pages_remote(mm, addr, 1, gup_flags, , , NULL) <= 
> > 0)
> > +   return -EINVAL;
> > +
> > +   kaddr = kmap_atomic(page);
> > +   *instr = *((u32 *)(kaddr + (addr & ~PAGE_MASK)));
> > +   kunmap_atomic(kaddr);
> > +   put_page(page);
> > +   return 0;
> > +}
> > +
> > +static int validate_prefixed_instr(struct mm_struct *mm, unsigned long 
> > addr)
> > +{
> > +   struct ppc_inst inst;
> > +   u32 prefix, suffix;
> > +
> > +   /*
> > +* No need to check if addr is pointing to beginning of the
> > +* page. Even if probe is on a suffix of page-unaligned
> > +* prefixed instruction, hw will raise exception and kernel
> > +* will send SIGBUS.
> > +*/
> > +   if (!(addr & ~PAGE_MASK))
> > +   return 0;
> > +
> > +   if (get_instr(mm, addr, ) < 0)
> > +   return -EINVAL;
> > +   if (get_instr(mm, addr + 4, ) < 0)
> > +   return -EINVAL;
> > +
> > +   inst = ppc_inst_prefix(prefix, suffix);
> > +   if (ppc_inst_prefixed(inst) && (addr & 0x3F) == 0x3C) {
> > +   printk_ratelimited("Cannot register a uprobe on 64 byte "
>   ^^ pr_info_ratelimited()
> 
> It should be sufficient to check the primary opcode to determine if it 
> is a prefixed instruction. You don't have to read the suffix. I see that 
> we don't have a helper to do this currently, so you could do:
> 
>   if (ppc_inst_primary_opcode(ppc_inst(prefix)) == 1)

Seeing the kprobes code, I realized that we have to check for another 
scenario (Thanks, Jordan!). If this is the suffix of a prefix 
instruction for which a uprobe has already been installed, then the 
previous word will be a 'trap' instruction. You need to check if there 
is a uprobe at the previous word, and if the original instruction there 
was a prefix instruction.

- Naveen



Re: [PATCH v2] powerpc/uprobes: Validation for prefixed instruction

2021-02-04 Thread Naveen N. Rao
On 2021/02/04 04:19PM, Ravi Bangoria wrote:
> 
> 
> On 2/4/21 4:17 PM, Ravi Bangoria wrote:
> > Don't allow Uprobe on 2nd word of a prefixed instruction. As per
> > ISA 3.1, prefixed instruction should not cross 64-byte boundary.
> > So don't allow Uprobe on such prefixed instruction as well.
> > 
> > There are two ways probed instruction is changed in mapped pages.
> > First, when Uprobe is activated, it searches for all the relevant
> > pages and replace instruction in them. In this case, if we notice
> > that probe is on the 2nd word of prefixed instruction, error out
> > directly. Second, when Uprobe is already active and user maps a
> > relevant page via mmap(), instruction is replaced via mmap() code
> > path. But because Uprobe is invalid, entire mmap() operation can
> > not be stopped. In this case just print an error and continue.
> 
> @mpe,
> 
> arch_uprobe_analyze_insn() can return early if
> cpu_has_feature(CPU_FTR_ARCH_31) is not set. But that will
> miss out a rare scenario of user running binary with prefixed
> instruction on p10 predecessors. Please let me know if I
> should add cpu_has_feature(CPU_FTR_ARCH_31) or not.

The check you are adding is very specific to prefixed instructions, so 
it makes sense to add a cpu feature check for v3.1.

On older processors, those are invalid instructions like any other. The 
instruction emulation infrastructure will refuse to emulate it and the 
instruction will be single stepped.

- Naveen


Re: [PATCH v2] powerpc/uprobes: Validation for prefixed instruction

2021-02-04 Thread Naveen N. Rao
On 2021/02/04 04:17PM, Ravi Bangoria wrote:
> Don't allow Uprobe on 2nd word of a prefixed instruction. As per
> ISA 3.1, prefixed instruction should not cross 64-byte boundary.
> So don't allow Uprobe on such prefixed instruction as well.
> 
> There are two ways probed instruction is changed in mapped pages.
> First, when Uprobe is activated, it searches for all the relevant
> pages and replace instruction in them. In this case, if we notice
> that probe is on the 2nd word of prefixed instruction, error out
> directly. Second, when Uprobe is already active and user maps a
> relevant page via mmap(), instruction is replaced via mmap() code
> path. But because Uprobe is invalid, entire mmap() operation can
> not be stopped. In this case just print an error and continue.
> 
> Signed-off-by: Ravi Bangoria 
> ---
> v1: 
> http://lore.kernel.org/r/20210119091234.76317-1-ravi.bango...@linux.ibm.com
> v1->v2:
>   - Instead of introducing new arch hook from verify_opcode(), use
> existing hook arch_uprobe_analyze_insn().
>   - Add explicit check for prefixed instruction crossing 64-byte
> boundary. If probe is on such instruction, throw an error.
> 
>  arch/powerpc/kernel/uprobes.c | 66 ++-
>  1 file changed, 65 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c
> index e8a63713e655..485d19a2a31f 100644
> --- a/arch/powerpc/kernel/uprobes.c
> +++ b/arch/powerpc/kernel/uprobes.c
> @@ -7,6 +7,7 @@
>   * Adapted from the x86 port by Ananth N Mavinakayanahalli 
> 
>   */
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -28,6 +29,69 @@ bool is_trap_insn(uprobe_opcode_t *insn)
>   return (is_trap(*insn));
>  }
>  
> +#ifdef CONFIG_PPC64
> +static int get_instr(struct mm_struct *mm, unsigned long addr, u32 *instr)
> +{
> + struct page *page;
> + struct vm_area_struct *vma;
> + void *kaddr;
> + unsigned int gup_flags = FOLL_FORCE | FOLL_SPLIT_PMD;
> +
> + if (get_user_pages_remote(mm, addr, 1, gup_flags, , , NULL) <= 
> 0)
> + return -EINVAL;
> +
> + kaddr = kmap_atomic(page);
> + *instr = *((u32 *)(kaddr + (addr & ~PAGE_MASK)));
> + kunmap_atomic(kaddr);
> + put_page(page);
> + return 0;
> +}
> +
> +static int validate_prefixed_instr(struct mm_struct *mm, unsigned long addr)
> +{
> + struct ppc_inst inst;
> + u32 prefix, suffix;
> +
> + /*
> +  * No need to check if addr is pointing to beginning of the
> +  * page. Even if probe is on a suffix of page-unaligned
> +  * prefixed instruction, hw will raise exception and kernel
> +  * will send SIGBUS.
> +  */
> + if (!(addr & ~PAGE_MASK))
> + return 0;
> +
> + if (get_instr(mm, addr, ) < 0)
> + return -EINVAL;
> + if (get_instr(mm, addr + 4, ) < 0)
> + return -EINVAL;
> +
> + inst = ppc_inst_prefix(prefix, suffix);
> + if (ppc_inst_prefixed(inst) && (addr & 0x3F) == 0x3C) {
> + printk_ratelimited("Cannot register a uprobe on 64 byte "
^^ pr_info_ratelimited()

It should be sufficient to check the primary opcode to determine if it 
is a prefixed instruction. You don't have to read the suffix. I see that 
we don't have a helper to do this currently, so you could do:

if (ppc_inst_primary_opcode(ppc_inst(prefix)) == 1)

In the future, it might be worthwhile to add IS_PREFIX() as a macro 
similar to IS_MTMSRD() if there are more such uses.

Along with this, if you also add the below to the start of this 
function, you can get rid of the #ifdef:

if (!IS_ENABLED(CONFIG_PPC64))
return 0;


- Naveen



Re: [PATCH] kprobes: Warn if the kprobe is reregistered

2021-02-03 Thread Naveen N . Rao
On 2021/02/03 11:59PM, Masami Hiramatsu wrote:
> Warn if the kprobe is reregistered, since there must be
> a software bug (actively used resource must not be re-registered)
> and caller must be fixed.
> 
> Signed-off-by: Masami Hiramatsu 
> ---
>  kernel/kprobes.c |   13 -
>  1 file changed, 8 insertions(+), 5 deletions(-)

Suggested-by: Naveen N. Rao 
Acked-by: Naveen N. Rao 


Thanks,
Naveen



Re: [PATCH] selftests/ftrace: Disable multiple_kprobes test on powerpc

2021-01-05 Thread Naveen N. Rao

Masami Hiramatsu wrote:

On Tue, 5 Jan 2021 19:01:56 +0900
Masami Hiramatsu  wrote:


On Tue,  5 Jan 2021 12:27:30 +0530
"Naveen N. Rao"  wrote:

> Not all symbols are blacklisted on powerpc. Disable multiple_kprobes
> test until that is sorted, so that rest of ftrace and kprobe selftests
> can be run.

This looks good to me, but could you try to find the functions
which should be blocked from kprobes?
(Usually, the function which are involved in the sw-breakpoint
 handling, including locks etc.)


Ah, OK. 
I wonder why CONFIG_KPROBE_EVENTS_ON_NOTRACE=n doesn't help, it

was ignored if the arch doesn't support CONFIG_KPROBES_ON_FTRACE.


Good point, though we do support CONFIG_KPROBES_ON_FTRACE on powerpc so 
the below patch is unlikely to help. However, since entry code is 
unlikely to be the source of the issue due to 
CONFIG_KPROBE_EVENTS_ON_NOTRACE, I will take another look to see where 
the problem lies.




Naveen, could you try to run this test case with following patch
on powerpc?

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index b911e9f6d9f5..241a55313476 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -433,7 +433,7 @@ static int disable_trace_kprobe(struct trace_event_call 
*call,
return 0;
 }

-#if defined(CONFIG_KPROBES_ON_FTRACE) && \
+#if defined(CONFIG_FUNCTION_TRACER) && \
!defined(CONFIG_KPROBE_EVENTS_ON_NOTRACE)
 static bool __within_notrace_func(unsigned long addr)
 {



This looks like a good change regardless, so if you intend to post this 
separately:

Acked-by: Naveen N. Rao 

Thanks,
Naveen



Re: [PATCH] selftests/ftrace: Disable multiple_kprobes test on powerpc

2021-01-05 Thread Naveen N. Rao

Masami Hiramatsu wrote:

On Tue,  5 Jan 2021 12:27:30 +0530
"Naveen N. Rao"  wrote:


Not all symbols are blacklisted on powerpc. Disable multiple_kprobes
test until that is sorted, so that rest of ftrace and kprobe selftests
can be run.


This looks good to me, but could you try to find the functions
which should be blocked from kprobes?
(Usually, the function which are involved in the sw-breakpoint
 handling, including locks etc.)


Yes, we did add several blacklists some time back, but there has been 
quite a bit of churn in our entry code. I've been meaning to audit it 
for a while now, but this has been blocking tests. It would be nice to 
skip this test for now until I am able to spend some time on this.


Thanks,
Naveen



[PATCH] selftests/ftrace: Disable multiple_kprobes test on powerpc

2021-01-04 Thread Naveen N. Rao
Not all symbols are blacklisted on powerpc. Disable multiple_kprobes
test until that is sorted, so that rest of ftrace and kprobe selftests
can be run.

Signed-off-by: Naveen N. Rao 
---
 .../testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc 
b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
index 312d237800969e..41503c32f53eed 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
@@ -7,7 +7,7 @@
 # Currently HAVE_KPROBES_ON_FTRACE defined on x86 and powerpc64le
 case `uname -m` in
   x86_64|i[3456]86) OFFS=5;;
-  ppc64le) OFFS=8;;
+  ppc*) exit_unsupported;;
   *) OFFS=0;;
 esac
 

base-commit: 36bbbd0e234d817938bdc52121a0f5473b3e58f5
-- 
2.25.4



Re: [PATCH 3/4] perf tools: Update powerpc's syscall.tbl

2020-12-23 Thread Naveen N. Rao

Arnaldo Carvalho de Melo wrote:

Em Fri, Dec 18, 2020 at 08:08:56PM +0530, Naveen N. Rao escreveu:

Hi Arnaldo,

Arnaldo Carvalho de Melo wrote:
> Em Fri, Dec 18, 2020 at 08:26:59AM -0300, Arnaldo Carvalho de Melo escreveu:
> > Em Fri, Dec 18, 2020 at 03:59:23PM +0800, Tiezhu Yang escreveu:
> > > This silences the following tools/perf/ build warning:
> > > Warning: Kernel ABI header at 
'tools/perf/arch/powerpc/entry/syscalls/syscall.tbl' differs from latest version at 
'arch/powerpc/kernel/syscalls/syscall.tbl'
> > 
> > Hi Ravi, Naveen,
> > 
> > 	Can I get your Reviewed-by or Acked-by for this change and the

> > other that adds s390's syscall.tbl to check_headers.sh so that we get
> 
> oops s/s390/powerpc/g :-)
> 
> > notified when the copy drifts, so that we can see if it still continues

> > working and we can get new syscalls to be supported in things like 'perf
> > trace'?

Yes, this looks good to me:
Reviewed-by: Naveen N. Rao 

FWIW, I had posted a similar patch back in April, but glad to have this go
in ;)
http://lkml.kernel.org/r/20200220063740.785913-1-naveen.n@linux.vnet.ibm.com


My bad :-\


No worries, thanks for checking on this one.

- Naveen



Re: [PATCH 3/4] perf tools: Update powerpc's syscall.tbl

2020-12-18 Thread Naveen N. Rao

Hi Arnaldo,

Arnaldo Carvalho de Melo wrote:

Em Fri, Dec 18, 2020 at 08:26:59AM -0300, Arnaldo Carvalho de Melo escreveu:

Em Fri, Dec 18, 2020 at 03:59:23PM +0800, Tiezhu Yang escreveu:
> This silences the following tools/perf/ build warning:
> Warning: Kernel ABI header at 
'tools/perf/arch/powerpc/entry/syscalls/syscall.tbl' differs from latest version 
at 'arch/powerpc/kernel/syscalls/syscall.tbl'

Hi Ravi, Naveen,

Can I get your Reviewed-by or Acked-by for this change and the
other that adds s390's syscall.tbl to check_headers.sh so that we get


oops s/s390/powerpc/g :-)


notified when the copy drifts, so that we can see if it still continues
working and we can get new syscalls to be supported in things like 'perf
trace'?


Yes, this looks good to me:
Reviewed-by: Naveen N. Rao 

FWIW, I had posted a similar patch back in April, but glad to have this 
go in ;)

http://lkml.kernel.org/r/20200220063740.785913-1-naveen.n@linux.vnet.ibm.com


Thanks,
Naveen



Re: [RFC PATCH 01/14] ftrace: Fix updating FTRACE_FL_TRAMP

2020-12-01 Thread Naveen N. Rao

Steven Rostedt wrote:

On Thu, 26 Nov 2020 23:38:38 +0530
"Naveen N. Rao"  wrote:


On powerpc, kprobe-direct.tc triggered FTRACE_WARN_ON() in
ftrace_get_addr_new() followed by the below message:
  Bad trampoline accounting at: 4222522f (wake_up_process+0xc/0x20) 
(f001)

The set of steps leading to this involved:
- modprobe ftrace-direct-too
- enable_probe
- modprobe ftrace-direct
- rmmod ftrace-direct <-- trigger

The problem turned out to be that we were not updating flags in the
ftrace record properly. From the above message about the trampoline
accounting being bad, it can be seen that the ftrace record still has
FTRACE_FL_TRAMP set though ftrace-direct module is going away. This
happens because we are checking if any ftrace_ops has the
FTRACE_FL_TRAMP flag set _before_ updating the filter hash.

The fix for this is to look for any _other_ ftrace_ops that also needs
FTRACE_FL_TRAMP.


I'm applying this now and sending this for -rc and stable.

The code worked on x86 because x86 has a way to make all users use
trampolines, so this was never an issue (everything has a trampoline).
I modified the kernel so that x86 would not create its own trampoline
(see the weak function arch_ftrace_update_trampoline(), and I was able
to reproduce the bug.


Good to know that you were able to reproduce this.



I'm adding:

Cc: sta...@vger.kernel.org
Fixes: a124692b698b0 ("ftrace: Enable trampoline when rec count returns back to 
one")


That looks good to me. Thanks for picking the two patches and for your 
review on the others!



- Naveen



[RFC PATCH 09/14] powerpc/ftrace: Use a hash table for tracking ftrace stubs

2020-11-26 Thread Naveen N. Rao
In preparation for having to deal with large number of ftrace stubs in
support of ftrace direct calls, convert existing stubs to use a hash
table. The hash table is key'ed off the target address for the stubs
since there could be multiple stubs for the same target to cover the
full kernel text.

Signed-off-by: Naveen N. Rao 
---
 arch/powerpc/kernel/trace/ftrace.c | 75 +-
 1 file changed, 33 insertions(+), 42 deletions(-)

diff --git a/arch/powerpc/kernel/trace/ftrace.c 
b/arch/powerpc/kernel/trace/ftrace.c
index 14b39f7797d455..7ddb6e4b527c39 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -13,6 +13,7 @@
 
 #define pr_fmt(fmt) "ftrace-powerpc: " fmt
 
+#include 
 #include 
 #include 
 #include 
@@ -32,14 +33,12 @@
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 
-/*
- * We generally only have a single long_branch tramp and at most 2 or 3 plt
- * tramps generated. But, we don't use the plt tramps currently. We also allot
- * 2 tramps after .text and .init.text. So, we only end up with around 3 usable
- * tramps in total. Set aside 8 just to be sure.
- */
-#defineNUM_FTRACE_TRAMPS   8
-static unsigned long ftrace_tramps[NUM_FTRACE_TRAMPS];
+static DEFINE_HASHTABLE(ppc_ftrace_stubs, 8);
+struct ppc_ftrace_stub_data {
+   unsigned long addr;
+   unsigned long target;
+   struct hlist_node hentry;
+};
 
 static struct ppc_inst
 ftrace_call_replace(unsigned long ip, unsigned long addr, int link)
@@ -288,36 +287,31 @@ __ftrace_make_nop(struct module *mod,
 #endif /* PPC64 */
 #endif /* CONFIG_MODULES */
 
-static unsigned long find_ftrace_tramp(unsigned long ip)
+static unsigned long find_ftrace_tramp(unsigned long ip, unsigned long target)
 {
-   int i;
+   struct ppc_ftrace_stub_data *stub;
struct ppc_inst instr;
 
-   /*
-* We have the compiler generated long_branch tramps at the end
-* and we prefer those
-*/
-   for (i = NUM_FTRACE_TRAMPS - 1; i >= 0; i--)
-   if (!ftrace_tramps[i])
-   continue;
-   else if (create_branch(, (void *)ip,
-  ftrace_tramps[i], 0) == 0)
-   return ftrace_tramps[i];
+   hash_for_each_possible(ppc_ftrace_stubs, stub, hentry, target)
+   if (stub->target == target && !create_branch(, (void 
*)ip, stub->addr, 0))
+   return stub->addr;
 
return 0;
 }
 
-static int add_ftrace_tramp(unsigned long tramp)
+static int add_ftrace_tramp(unsigned long tramp, unsigned long target)
 {
-   int i;
+   struct ppc_ftrace_stub_data *stub;
 
-   for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
-   if (!ftrace_tramps[i]) {
-   ftrace_tramps[i] = tramp;
-   return 0;
-   }
+   stub = kmalloc(sizeof(*stub), GFP_KERNEL);
+   if (!stub)
+   return -1;
 
-   return -1;
+   stub->addr = tramp;
+   stub->target = target;
+   hash_add(ppc_ftrace_stubs, >hentry, target);
+
+   return 0;
 }
 
 /*
@@ -328,16 +322,14 @@ static int add_ftrace_tramp(unsigned long tramp)
  */
 static int setup_mcount_compiler_tramp(unsigned long tramp)
 {
-   int i;
struct ppc_inst op;
-   unsigned long ptr;
struct ppc_inst instr;
+   struct ppc_ftrace_stub_data *stub;
+   unsigned long ptr, ftrace_target = ppc_global_function_entry((void 
*)FTRACE_REGS_ADDR);
 
/* Is this a known long jump tramp? */
-   for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
-   if (!ftrace_tramps[i])
-   break;
-   else if (ftrace_tramps[i] == tramp)
+   hash_for_each_possible(ppc_ftrace_stubs, stub, hentry, ftrace_target)
+   if (stub->target == ftrace_target && stub->addr == tramp)
return 0;
 
/* New trampoline -- read where this goes */
@@ -361,19 +353,18 @@ static int setup_mcount_compiler_tramp(unsigned long 
tramp)
}
 
/* Let's re-write the tramp to go to ftrace_[regs_]caller */
-   ptr = ppc_global_function_entry((void *)FTRACE_REGS_ADDR);
-   if (create_branch(, (void *)tramp, ptr, 0)) {
+   if (create_branch(, (void *)tramp, ftrace_target, 0)) {
pr_debug("%ps is not reachable from existing mcount tramp\n",
-   (void *)ptr);
+   (void *)ftrace_target);
return -1;
}
 
-   if (patch_branch((struct ppc_inst *)tramp, ptr, 0)) {
+   if (patch_branch((struct ppc_inst *)tramp, ftrace_target, 0)) {
pr_debug("REL24 out of range!\n");
return -1;
}
 
-   if (add_ftrace_tramp(tramp)) {
+   if (add_ftrace_tramp(tramp, ftrace_target)) {
pr_debug("No tramp locations left\n");
   

[RFC PATCH 11/14] powerpc/ftrace: Use GPR save/restore macros in ftrace_graph_caller()

2020-11-26 Thread Naveen N. Rao
Use SAVE_8GPRS(), REST_8GPRS() and _NIP(), along with using the standard
SWITCH_FRAME_SIZE for the stack frame in ftrace_graph_caller() to
simplify code. This increases the stack frame size, but it is unlikely
to be an issue since ftrace_[regs_]caller() have just used a similar
stack frame size, and it isn't evident that the graph caller has too
deep a call stack to cause issues.

Signed-off-by: Naveen N. Rao 
---
 .../powerpc/kernel/trace/ftrace_64_mprofile.S | 28 +--
 1 file changed, 7 insertions(+), 21 deletions(-)

diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S 
b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
index f9fd5f743eba34..bbe871b47ade58 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
+++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
@@ -279,24 +279,17 @@ livepatch_handler:
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 _GLOBAL(ftrace_graph_caller)
-   stdur1, -112(r1)
+   stdur1,-SWITCH_FRAME_SIZE(r1)
/* with -mprofile-kernel, parameter regs are still alive at _mcount */
-   std r10, 104(r1)
-   std r9, 96(r1)
-   std r8, 88(r1)
-   std r7, 80(r1)
-   std r6, 72(r1)
-   std r5, 64(r1)
-   std r4, 56(r1)
-   std r3, 48(r1)
+   SAVE_8GPRS(3, r1)
 
/* Save callee's TOC in the ABI compliant location */
std r2, 24(r1)
ld  r2, PACATOC(r13)/* get kernel TOC in r2 */
 
-   addir5, r1, 112
+   addir5, r1, SWITCH_FRAME_SIZE
mfctr   r4  /* ftrace_caller has moved local addr here */
-   std r4, 40(r1)
+   std r4, _NIP(r1)
mflrr3  /* ftrace_caller has restored LR from stack */
subir4, r4, MCOUNT_INSN_SIZE
 
@@ -309,21 +302,14 @@ _GLOBAL(ftrace_graph_caller)
 */
mtlrr3
 
-   ld  r0, 40(r1)
+   ld  r0, _NIP(r1)
mtctr   r0
-   ld  r10, 104(r1)
-   ld  r9, 96(r1)
-   ld  r8, 88(r1)
-   ld  r7, 80(r1)
-   ld  r6, 72(r1)
-   ld  r5, 64(r1)
-   ld  r4, 56(r1)
-   ld  r3, 48(r1)
+   REST_8GPRS(3, r1)
 
/* Restore callee's TOC */
ld  r2, 24(r1)
 
-   addir1, r1, 112
+   addir1, r1, SWITCH_FRAME_SIZE
mflrr0
std r0, LRSAVE(r1)
bctr
-- 
2.25.4



[RFC PATCH 14/14] samples/ftrace: Add powerpc support for ftrace direct samples

2020-11-26 Thread Naveen N. Rao
Add a simple powerpc trampoline to demonstrate use of ftrace direct on
powerpc.

Signed-off-by: Naveen N. Rao 
---
 samples/Kconfig   |  2 +-
 samples/ftrace/ftrace-direct-modify.c | 58 +++
 samples/ftrace/ftrace-direct-too.c| 48 --
 samples/ftrace/ftrace-direct.c| 45 +++--
 4 files changed, 144 insertions(+), 9 deletions(-)

diff --git a/samples/Kconfig b/samples/Kconfig
index 0ed6e4d71d87b1..fdc9e44dba3b95 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -26,7 +26,7 @@ config SAMPLE_TRACE_PRINTK
 config SAMPLE_FTRACE_DIRECT
tristate "Build register_ftrace_direct() example"
depends on DYNAMIC_FTRACE_WITH_DIRECT_CALLS && m
-   depends on X86_64 # has x86_64 inlined asm
+   depends on X86_64 || PPC64 # has inlined asm
help
  This builds an ftrace direct function example
  that hooks to wake_up_process and prints the parameters.
diff --git a/samples/ftrace/ftrace-direct-modify.c 
b/samples/ftrace/ftrace-direct-modify.c
index c13a5bc5095bea..89d66a12d300e1 100644
--- a/samples/ftrace/ftrace-direct-modify.c
+++ b/samples/ftrace/ftrace-direct-modify.c
@@ -2,6 +2,7 @@
 #include 
 #include 
 #include 
+#include 
 
 void my_direct_func1(void)
 {
@@ -18,6 +19,7 @@ extern void my_tramp2(void *);
 
 static unsigned long my_ip = (unsigned long)schedule;
 
+#ifdef CONFIG_X86
 asm (
 "  .pushsection.text, \"ax\", @progbits\n"
 "  .type   my_tramp1, @function\n"
@@ -38,6 +40,58 @@ asm (
 "  .size   my_tramp2, .-my_tramp2\n"
 "  .popsection\n"
 );
+#elif CONFIG_PPC64
+asm (
+"  .pushsection.text, \"ax\", @progbits\n"
+"  .type   my_tramp1, @function\n"
+"  .global my_tramp1\n"
+"   my_tramp1:\n"
+"  std 0, 16(1)\n"
+"  stdu1, -480(1)\n"
+"  std 2, 24(1)\n"
+"  mflr7\n"
+"  std 7, 368(1)\n"
+"  bcl 20, 31, 1f\n"
+"1:mflr12\n"
+"  ld  2, (3f - 1b)(12)\n"
+"  bl  my_direct_func1\n"
+"  nop\n"
+"  ld  2, 24(1)\n"
+"  ld  7, 368(1)\n"
+"  mtctr   7\n"
+"  addi1, 1, 480\n"
+"  ld  0, 16(1)\n"
+"  mtlr0\n"
+"  bctr\n"
+"  .size   my_tramp1, .-my_tramp1\n"
+"  nop\n"
+"  .type   my_tramp2, @function\n"
+"  .global my_tramp2\n"
+"   my_tramp2:\n"
+"  std 0, 16(1)\n"
+"  stdu1, -480(1)\n"
+"  std 2, 24(1)\n"
+"  mflr7\n"
+"  std 7, 368(1)\n"
+"  bcl 20, 31, 2f\n"
+"2:mflr12\n"
+"  ld  2, (3f - 2b)(12)\n"
+"  bl  my_direct_func2\n"
+"  nop\n"
+"  ld  2, 24(1)\n"
+"  ld  7, 368(1)\n"
+"  mtctr   7\n"
+"  addi1, 1, 480\n"
+"  ld  0, 16(1)\n"
+"  mtlr0\n"
+"  bctr\n"
+"  .size   my_tramp2, .-my_tramp2\n"
+"3:\n"
+"  .quad   .TOC.@tocbase\n"
+"  .popsection\n"
+);
+#endif
+
 
 static unsigned long my_tramp = (unsigned long)my_tramp1;
 static unsigned long tramps[2] = {
@@ -72,6 +126,10 @@ static int __init ftrace_direct_init(void)
 {
int ret;
 
+#ifdef CONFIG_PPC64
+   my_ip = ppc_function_entry((void *)my_ip) + 4;
+#endif
+
ret = register_ftrace_direct(my_ip, my_tramp);
if (!ret)
simple_tsk = kthread_run(simple_thread, NULL, 
"event-sample-fn");
diff --git a/samples/ftrace/ftrace-direct-too.c 
b/samples/ftrace/ftrace-direct-too.c
index d5c5022be66429..9a82abecbe0dcc 100644
--- a/samples/ftrace/ftrace-direct-too.c
+++ b/samples/ftrace/ftrace-direct-too.c
@@ -3,6 +3,7 @@
 
 #include  /* for handle_mm_fault() */
 #include 
+#include 
 
 void my_direct_func(struct vm_area_struct *vma,
unsigned long address, unsigned int flags)
@@ -13,6 +14,9 @@ void my_direct_func(struct vm_area_struct *vma,
 
 extern void my_tramp(void *);
 
+static unsigned long my_ip = (unsigned long)handle_mm_fault;
+
+#ifdef CONFIG_X86
 asm (
 "  .pushsection.text, \"ax\", @progbits\n"
 "  .type   my_tramp, @function\n"
@@ -31,18 +35,54 @@ asm (
 "  .size   my_tramp, .-my_tramp\n"
 "  .popsection\n"
 );
+#elif CONFIG_PPC64
+asm (
+"  .pushsection.text, \"ax\", @progbits\n"
+"  .type   my_tramp, @function\n

[RFC PATCH 10/14] powerpc/ftrace: Drop assumptions about ftrace trampoline target

2020-11-26 Thread Naveen N. Rao
We currently assume that ftrace locations are patched to go to either
ftrace_caller or ftrace_regs_caller. Drop this assumption in preparation
for supporting ftrace direct calls.

Signed-off-by: Naveen N. Rao 
---
 arch/powerpc/kernel/trace/ftrace.c | 107 +++--
 1 file changed, 86 insertions(+), 21 deletions(-)

diff --git a/arch/powerpc/kernel/trace/ftrace.c 
b/arch/powerpc/kernel/trace/ftrace.c
index 7ddb6e4b527c39..fcb21a9756e456 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -322,14 +322,15 @@ static int add_ftrace_tramp(unsigned long tramp, unsigned 
long target)
  */
 static int setup_mcount_compiler_tramp(unsigned long tramp)
 {
+   int i;
struct ppc_inst op;
struct ppc_inst instr;
struct ppc_ftrace_stub_data *stub;
unsigned long ptr, ftrace_target = ppc_global_function_entry((void 
*)FTRACE_REGS_ADDR);
 
-   /* Is this a known long jump tramp? */
-   hash_for_each_possible(ppc_ftrace_stubs, stub, hentry, ftrace_target)
-   if (stub->target == ftrace_target && stub->addr == tramp)
+   /* Is this a known tramp? */
+   hash_for_each(ppc_ftrace_stubs, i, stub, hentry)
+   if (stub->addr == tramp)
return 0;
 
/* New trampoline -- read where this goes */
@@ -608,23 +609,16 @@ static int __ftrace_make_call_kernel(struct dyn_ftrace 
*rec, unsigned long addr)
 {
struct ppc_inst op;
void *ip = (void *)rec->ip;
-   unsigned long tramp, entry, ptr;
+   unsigned long tramp, ptr;
 
-   /* Make sure we're being asked to patch branch to a known ftrace addr */
-   entry = ppc_global_function_entry((void *)ftrace_caller);
ptr = ppc_global_function_entry((void *)addr);
 
-   if (ptr != entry) {
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
-   entry = ppc_global_function_entry((void *)ftrace_regs_caller);
-   if (ptr != entry) {
+   /* Make sure we branch to ftrace_regs_caller since we only setup stubs 
for that */
+   tramp = ppc_global_function_entry((void *)ftrace_caller);
+   if (ptr == tramp)
+   ptr = ppc_global_function_entry((void *)FTRACE_REGS_ADDR);
 #endif
-   pr_err("Unknown ftrace addr to patch: %ps\n", (void 
*)ptr);
-   return -EINVAL;
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
-   }
-#endif
-   }
 
/* Make sure we have a nop */
if (probe_kernel_read_inst(, ip)) {
@@ -637,7 +631,7 @@ static int __ftrace_make_call_kernel(struct dyn_ftrace 
*rec, unsigned long addr)
return -EINVAL;
}
 
-   tramp = find_ftrace_tramp((unsigned long)ip, FTRACE_REGS_ADDR);
+   tramp = find_ftrace_tramp((unsigned long)ip, ptr);
if (!tramp) {
pr_err("No ftrace trampolines reachable from %ps\n", ip);
return -EINVAL;
@@ -783,6 +777,81 @@ __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long 
old_addr,
 }
 #endif
 
+static int
+__ftrace_modify_call_kernel(struct dyn_ftrace *rec, unsigned long old_addr, 
unsigned long addr)
+{
+   struct ppc_inst op;
+   unsigned long ip = rec->ip;
+   unsigned long entry, ptr, tramp;
+
+   /* read where this goes */
+   if (probe_kernel_read_inst(, (void *)ip)) {
+   pr_err("Fetching opcode failed.\n");
+   return -EFAULT;
+   }
+
+   /* Make sure that this is still a 24bit jump */
+   if (!is_bl_op(op)) {
+   pr_err("Not expected bl: opcode is %s\n", ppc_inst_as_str(op));
+   return -EINVAL;
+   }
+
+   /* lets find where the pointer goes */
+   tramp = find_bl_target(ip, op);
+   entry = ppc_global_function_entry((void *)old_addr);
+
+   pr_devel("ip:%lx jumps to %lx", ip, tramp);
+
+   if (tramp != entry) {
+   /* old_addr is not within range, so we must have used a 
trampoline */
+   struct ppc_ftrace_stub_data *stub;
+
+   hash_for_each_possible(ppc_ftrace_stubs, stub, hentry, entry)
+   if (stub->target == entry && stub->addr == tramp)
+   break;
+
+   if (stub->target != entry || stub->addr != tramp) {
+   pr_err("we don't know about the tramp at %lx!\n", 
tramp);
+   return -EFAULT;
+   }
+   }
+
+   /* The new target may be within range */
+   if (test_24bit_addr(ip, addr)) {
+   /* within range */
+   if (patch_branch((struct ppc_inst *)ip, addr, BRANCH_SET_LINK)) 
{
+   pr_err("REL24 out of range!\n");
+   return -EINVAL;
+   }
+
+   return 0;
+   }
+
+   ptr = ppc_global_function_entry((void *)addr);
+
+#ifd

[RFC PATCH 13/14] powerpc/ftrace: Add support for register_ftrace_direct() for MPROFILE_KERNEL

2020-11-26 Thread Naveen N. Rao
Add support for register_ftrace_direct() for MPROFILE_KERNEL, as it
depends on DYNAMIC_FTRACE_WITH_REGS.

Since powerpc only provides a branch range of 32MB, we set aside a 64k
area within kernel text for creating stubs that can be used to branch to
the provided trampoline, which can be located in the module area. This
is limited to kernel text, and as such, ftrace direct calls are not
supported for functions in kernel modules at this time.

We use orig_gpr3 to stash the address of the direct call trampoline in
arch_ftrace_set_direct_caller().  ftrace_regs_caller() is updated to
check for this to determine if we need to redirect to a direct call
trampoline. As the direct call trampoline has to work as an alternative
for the ftrace trampoline, we setup LR and r0 appropriately, and update
ctr to the trampoline address.  Finally, ftrace_graph_caller() is
updated to save/restore r0.

Signed-off-by: Naveen N. Rao 
---
 arch/powerpc/Kconfig  |   1 +
 arch/powerpc/include/asm/ftrace.h |  14 ++
 arch/powerpc/kernel/trace/ftrace.c| 140 +-
 .../powerpc/kernel/trace/ftrace_64_mprofile.S |  40 -
 4 files changed, 182 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index cfc6dd787f532c..a87ac2e403196e 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -197,6 +197,7 @@ config PPC
select HAVE_DEBUG_KMEMLEAK
select HAVE_DEBUG_STACKOVERFLOW
select HAVE_DYNAMIC_FTRACE
+   select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLSif MPROFILE_KERNEL
select HAVE_DYNAMIC_FTRACE_WITH_REGSif MPROFILE_KERNEL
select HAVE_EBPF_JITif PPC64
select HAVE_EFFICIENT_UNALIGNED_ACCESS  if !(CPU_LITTLE_ENDIAN && 
POWER7_CPU)
diff --git a/arch/powerpc/include/asm/ftrace.h 
b/arch/powerpc/include/asm/ftrace.h
index bc76970b6ee532..2f1c46e9f5d416 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -10,6 +10,8 @@
 
 #define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
 
+#define FTRACE_STUBS_SIZE  65536
+
 #ifdef __ASSEMBLY__
 
 /* Based off of objdump optput from glibc */
@@ -59,6 +61,18 @@ static inline unsigned long ftrace_call_adjust(unsigned long 
addr)
 struct dyn_arch_ftrace {
struct module *mod;
 };
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+/*
+ * When there is a direct caller registered, we use regs->orig_gpr3 (similar to
+ * how x86 uses orig_ax) to let ftrace_{regs_}_caller know that we should go
+ * there instead of returning to the function
+ */
+static inline void arch_ftrace_set_direct_caller(struct pt_regs *regs, 
unsigned long addr)
+{
+   regs->orig_gpr3 = addr;
+}
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
 #endif /* __ASSEMBLY__ */
 
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
diff --git a/arch/powerpc/kernel/trace/ftrace.c 
b/arch/powerpc/kernel/trace/ftrace.c
index fcb21a9756e456..815b14ae45a71f 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -37,6 +37,7 @@ static DEFINE_HASHTABLE(ppc_ftrace_stubs, 8);
 struct ppc_ftrace_stub_data {
unsigned long addr;
unsigned long target;
+   refcount_t refs;
struct hlist_node hentry;
 };
 
@@ -299,7 +300,7 @@ static unsigned long find_ftrace_tramp(unsigned long ip, 
unsigned long target)
return 0;
 }
 
-static int add_ftrace_tramp(unsigned long tramp, unsigned long target)
+static int add_ftrace_tramp(unsigned long tramp, unsigned long target, int 
lock)
 {
struct ppc_ftrace_stub_data *stub;
 
@@ -309,11 +310,123 @@ static int add_ftrace_tramp(unsigned long tramp, 
unsigned long target)
 
stub->addr = tramp;
stub->target = target;
+   refcount_set(>refs, 1);
+   if (lock)
+   refcount_inc(>refs);
hash_add(ppc_ftrace_stubs, >hentry, target);
 
return 0;
 }
 
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+static u32 ftrace_direct_stub_insns[] = {
+   PPC_RAW_LIS(12, 0),
+   PPC_RAW_ORI(12, 12, 0),
+   PPC_RAW_SLDI(12, 12, 32),
+   PPC_RAW_ORIS(12, 12, 0),
+   PPC_RAW_ORI(12, 12, 0),
+   PPC_RAW_MTCTR(12),
+   PPC_RAW_BCTR(),
+};
+#define FTRACE_NUM_STUBS   (FTRACE_STUBS_SIZE / 
sizeof(ftrace_direct_stub_insns))
+static DECLARE_BITMAP(stubs_bitmap, FTRACE_NUM_STUBS);
+extern unsigned int ftrace_stubs[];
+
+static unsigned long get_ftrace_tramp(unsigned long ip, unsigned long target)
+{
+   struct ppc_ftrace_stub_data *stub_data;
+   struct ppc_inst instr;
+   unsigned int *stub;
+   int index;
+
+   hash_for_each_possible(ppc_ftrace_stubs, stub_data, hentry, target) {
+   if (stub_data->target == target &&
+   !create_branch(, (void *)ip, 
stub_data->addr, 0)) {
+   refcount_inc(_data->refs);
+   return stub_data->addr;
+  

[RFC PATCH 12/14] powerpc/ftrace: Drop saving LR to stack save area for -mprofile-kernel

2020-11-26 Thread Naveen N. Rao
In -mprofile-kernel variant of ftrace_graph_caller(), we save the
optionally-updated LR address into the stack save area at the end. This
is likely an offshoot of the initial -mprofile-kernel implementation in
gcc emitting the same as part of the -mprofile-kernel instruction
sequence. However, this is not required. Drop it.

Signed-off-by: Naveen N. Rao 
---
 arch/powerpc/kernel/trace/ftrace_64_mprofile.S | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S 
b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
index bbe871b47ade58..c5602e9b07faa3 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
+++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
@@ -310,7 +310,5 @@ _GLOBAL(ftrace_graph_caller)
ld  r2, 24(r1)
 
addir1, r1, SWITCH_FRAME_SIZE
-   mflrr0
-   std r0, LRSAVE(r1)
bctr
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
-- 
2.25.4



[RFC PATCH 08/14] powerpc/ftrace: Use FTRACE_REGS_ADDR to identify the correct ftrace trampoline

2020-11-26 Thread Naveen N. Rao
Use FTRACE_REGS_ADDR instead of keying off
CONFIG_DYNAMIC_FTRACE_WITH_REGS to identify the proper ftrace trampoline
address to use.

Signed-off-by: Naveen N. Rao 
---
 arch/powerpc/kernel/trace/ftrace.c | 12 ++--
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/kernel/trace/ftrace.c 
b/arch/powerpc/kernel/trace/ftrace.c
index 4fe5f373172fd2..14b39f7797d455 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -361,11 +361,7 @@ static int setup_mcount_compiler_tramp(unsigned long tramp)
}
 
/* Let's re-write the tramp to go to ftrace_[regs_]caller */
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
-   ptr = ppc_global_function_entry((void *)ftrace_regs_caller);
-#else
-   ptr = ppc_global_function_entry((void *)ftrace_caller);
-#endif
+   ptr = ppc_global_function_entry((void *)FTRACE_REGS_ADDR);
if (create_branch(, (void *)tramp, ptr, 0)) {
pr_debug("%ps is not reachable from existing mcount tramp\n",
(void *)ptr);
@@ -885,11 +881,7 @@ int __init ftrace_dyn_arch_init(void)
0x7d8903a6, /* mtctr   r12  */
0x4e800420, /* bctr */
};
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
-   unsigned long addr = ppc_global_function_entry((void 
*)ftrace_regs_caller);
-#else
-   unsigned long addr = ppc_global_function_entry((void *)ftrace_caller);
-#endif
+   unsigned long addr = ppc_global_function_entry((void 
*)FTRACE_REGS_ADDR);
long reladdr = addr - kernel_toc_addr();
 
if (reladdr > 0x7FFF || reladdr < -(0x8000L)) {
-- 
2.25.4



[RFC PATCH 07/14] powerpc/ftrace: Remove dead code

2020-11-26 Thread Naveen N. Rao
ftrace_plt_tramps[] was intended to speed up skipping plt branches, but
the code wasn't completed. It is also not significantly better than
reading and decoding the instruction. Remove the same.

Signed-off-by: Naveen N. Rao 
---
 arch/powerpc/kernel/trace/ftrace.c | 8 
 1 file changed, 8 deletions(-)

diff --git a/arch/powerpc/kernel/trace/ftrace.c 
b/arch/powerpc/kernel/trace/ftrace.c
index 42761ebec9f755..4fe5f373172fd2 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -332,7 +332,6 @@ static int setup_mcount_compiler_tramp(unsigned long tramp)
struct ppc_inst op;
unsigned long ptr;
struct ppc_inst instr;
-   static unsigned long ftrace_plt_tramps[NUM_FTRACE_TRAMPS];
 
/* Is this a known long jump tramp? */
for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
@@ -341,13 +340,6 @@ static int setup_mcount_compiler_tramp(unsigned long tramp)
else if (ftrace_tramps[i] == tramp)
return 0;
 
-   /* Is this a known plt tramp? */
-   for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
-   if (!ftrace_plt_tramps[i])
-   break;
-   else if (ftrace_plt_tramps[i] == tramp)
-   return -1;
-
/* New trampoline -- read where this goes */
if (probe_kernel_read_inst(, (void *)tramp)) {
pr_debug("Fetching opcode failed.\n");
-- 
2.25.4



[RFC PATCH 06/14] powerpc: Add support for CONFIG_HAVE_FUNCTION_ARG_ACCESS_API

2020-11-26 Thread Naveen N. Rao
Add register_get_kernel_argument() for a rudimentary way to access
kernel function arguments.

Signed-off-by: Naveen N. Rao 
---
 arch/powerpc/Kconfig  |  1 +
 arch/powerpc/include/asm/ptrace.h | 31 +++
 2 files changed, 32 insertions(+)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index e9f13fe084929b..cfc6dd787f532c 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -202,6 +202,7 @@ config PPC
select HAVE_EFFICIENT_UNALIGNED_ACCESS  if !(CPU_LITTLE_ENDIAN && 
POWER7_CPU)
select HAVE_FAST_GUP
select HAVE_FTRACE_MCOUNT_RECORD
+   select HAVE_FUNCTION_ARG_ACCESS_API
select HAVE_FUNCTION_ERROR_INJECTION
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_TRACER
diff --git a/arch/powerpc/include/asm/ptrace.h 
b/arch/powerpc/include/asm/ptrace.h
index e2c778c176a3a6..956828c07abd70 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -62,6 +62,8 @@ struct pt_regs
 };
 #endif
 
+#define NR_REG_ARGUMENTS 8
+
 #ifdef __powerpc64__
 
 /*
@@ -85,8 +87,10 @@ struct pt_regs
 
 #ifdef PPC64_ELF_ABI_v2
 #define STACK_FRAME_MIN_SIZE   32
+#define STACK_FRAME_PARM_SAVE  32
 #else
 #define STACK_FRAME_MIN_SIZE   STACK_FRAME_OVERHEAD
+#define STACK_FRAME_PARM_SAVE  48
 #endif
 
 /* Size of dummy stack frame allocated when calling signal handler. */
@@ -103,6 +107,7 @@ struct pt_regs
 #define STACK_INT_FRAME_SIZE   (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD)
 #define STACK_FRAME_MARKER 2
 #define STACK_FRAME_MIN_SIZE   STACK_FRAME_OVERHEAD
+#define STACK_FRAME_PARM_SAVE  8
 
 /* Size of stack frame allocated when calling signal handler. */
 #define __SIGNAL_FRAMESIZE 64
@@ -309,6 +314,32 @@ static inline unsigned long 
regs_get_kernel_stack_nth(struct pt_regs *regs,
return 0;
 }
 
+/**
+ * regs_get_kernel_argument() - get Nth function argument in kernel
+ * @regs:  pt_regs of that context
+ * @n: function argument number (start from 0)
+ *
+ * regs_get_argument() returns @n th argument of the function call.
+ * Note that this chooses most probable assignment, and is incorrect
+ * in scenarios where double or fp/vector parameters are involved.
+ * This also doesn't take into account stack alignment requirements.
+ *
+ * This is expected to be called from kprobes or ftrace with regs
+ * at function entry, so the current function has not setup its stack.
+ */
+static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs,
+unsigned int n)
+{
+   if (n >= NR_REG_ARGUMENTS) {
+#ifndef __powerpc64__
+   n -= NR_REG_ARGUMENTS;
+#endif
+   n += STACK_FRAME_PARM_SAVE / sizeof(unsigned long);
+   return regs_get_kernel_stack_nth(regs, n);
+   } else {
+   return regs_get_register(regs, offsetof(struct pt_regs, gpr[n + 
3]));
+   }
+}
 #endif /* __ASSEMBLY__ */
 
 #ifndef __powerpc64__
-- 
2.25.4



[RFC PATCH 03/14] ftrace: Fix cleanup in error path of register_ftrace_direct()

2020-11-26 Thread Naveen N. Rao
We need to remove hash entry if register_ftrace_function() fails.
Consolidate the cleanup to be done after register_ftrace_function() at
the end.

Signed-off-by: Naveen N. Rao 
---
 kernel/trace/ftrace.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 9c1bba8cc51b03..3844a4a1346a9c 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -5136,8 +5136,6 @@ int register_ftrace_direct(unsigned long ip, unsigned 
long addr)
__add_hash_entry(direct_functions, entry);
 
ret = ftrace_set_filter_ip(_ops, ip, 0, 0);
-   if (ret)
-   remove_hash_entry(direct_functions, entry);
 
if (!ret && !(direct_ops.flags & FTRACE_OPS_FL_ENABLED)) {
ret = register_ftrace_function(_ops);
@@ -5146,6 +5144,7 @@ int register_ftrace_direct(unsigned long ip, unsigned 
long addr)
}
 
if (ret) {
+   remove_hash_entry(direct_functions, entry);
kfree(entry);
if (!direct->count) {
list_del_rcu(>next);
-- 
2.25.4



[RFC PATCH 02/14] ftrace: Fix DYNAMIC_FTRACE_WITH_DIRECT_CALLS dependency

2020-11-26 Thread Naveen N. Rao
DYNAMIC_FTRACE_WITH_DIRECT_CALLS should depend on
DYNAMIC_FTRACE_WITH_REGS since we need ftrace_regs_caller().

Fixes: 763e34e74bb7d5c ("ftrace: Add register_ftrace_direct()")
Signed-off-by: Naveen N. Rao 
---
 kernel/trace/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index a4020c0b4508c9..e1bf5228fb692a 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -202,7 +202,7 @@ config DYNAMIC_FTRACE_WITH_REGS
 
 config DYNAMIC_FTRACE_WITH_DIRECT_CALLS
def_bool y
-   depends on DYNAMIC_FTRACE
+   depends on DYNAMIC_FTRACE_WITH_REGS
depends on HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
 
 config FUNCTION_PROFILER
-- 
2.25.4



[RFC PATCH 05/14] ftrace: Add architectural helpers for [un]register_ftrace_direct()

2020-11-26 Thread Naveen N. Rao
Architectures may want to do some validation (such as to ensure that the
trampoline code is reachable from the provided ftrace location) before
accepting ftrace direct registration. Add helpers for the same.

Signed-off-by: Naveen N. Rao 
---
 include/linux/ftrace.h |  2 ++
 kernel/trace/ftrace.c  | 27 +--
 2 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 46b4b7ee28c41f..3fdcb4c513bc2d 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -290,6 +290,8 @@ int ftrace_modify_direct_caller(struct ftrace_func_entry 
*entry,
unsigned long old_addr,
unsigned long new_addr);
 unsigned long ftrace_find_rec_direct(unsigned long ip);
+int arch_register_ftrace_direct(unsigned long ip, unsigned long addr);
+void arch_unregister_ftrace_direct(unsigned long ip, unsigned long addr);
 #else
 # define ftrace_direct_func_count 0
 static inline int register_ftrace_direct(unsigned long ip, unsigned long addr)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 7476f2458b6d95..0e259b90527722 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -5005,6 +5005,13 @@ ftrace_set_addr(struct ftrace_ops *ops, unsigned long 
ip, int remove,
 }
 
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+int __weak arch_register_ftrace_direct(unsigned long ip, unsigned long addr)
+{
+   return 0;
+}
+
+void __weak arch_unregister_ftrace_direct(unsigned long ip, unsigned long 
addr) { }
+
 /**
  * register_ftrace_direct - Call a custom trampoline directly
  * @ip: The address of the nop at the beginning of a function
@@ -5028,6 +5035,7 @@ int register_ftrace_direct(unsigned long ip, unsigned 
long addr)
struct ftrace_hash *free_hash = NULL;
struct dyn_ftrace *rec;
int ret = -EBUSY;
+   int arch_ret;
 
mutex_lock(_mutex);
 
@@ -5082,18 +5090,24 @@ int register_ftrace_direct(unsigned long ip, unsigned 
long addr)
entry->direct = addr;
__add_hash_entry(direct_functions, entry);
 
-   ret = ftrace_set_filter_ip(_ops, ip, 0, 0);
+   arch_ret = arch_register_ftrace_direct(ip, addr);
 
-   if (!ret && !(direct_ops.flags & FTRACE_OPS_FL_ENABLED)) {
-   ret = register_ftrace_function(_ops);
-   if (ret)
-   ftrace_set_filter_ip(_ops, ip, 1, 0);
+   if (!arch_ret) {
+   ret = ftrace_set_filter_ip(_ops, ip, 0, 0);
+
+   if (!ret && !(direct_ops.flags & FTRACE_OPS_FL_ENABLED)) {
+   ret = register_ftrace_function(_ops);
+   if (ret)
+   ftrace_set_filter_ip(_ops, ip, 1, 0);
+   }
}
 
-   if (ret) {
+   if (arch_ret || ret) {
remove_hash_entry(direct_functions, entry);
ftrace_direct_func_count--;
kfree(entry);
+   if (!arch_ret)
+   arch_unregister_ftrace_direct(ip, addr);
}
  out_unlock:
mutex_unlock(_mutex);
@@ -5155,6 +5169,7 @@ int unregister_ftrace_direct(unsigned long ip, unsigned 
long addr)
remove_hash_entry(direct_functions, entry);
ftrace_direct_func_count--;
kfree(entry);
+   arch_unregister_ftrace_direct(ip, addr);
  out_unlock:
mutex_unlock(_mutex);
 
-- 
2.25.4



[RFC PATCH 04/14] ftrace: Remove ftrace_find_direct_func()

2020-11-26 Thread Naveen N. Rao
This is a revert of commit 013bf0da047481 ("ftrace: Add
ftrace_find_direct_func()")

ftrace_find_direct_func() was meant for use in the function graph tracer
by architecture specific code. However, commit ff205766dbbee0 ("ftrace:
Fix function_graph tracer interaction with BPF trampoline") disabled
function graph tracer for direct calls leaving this without any users.

In addition, modify_ftrace_direct() allowed redirecting the direct call
to a different trampoline that was never registered through
register_ftrace_direct(). This meant that ftrace_direct_funcs didn't
capture all trampolines.

Signed-off-by: Naveen N. Rao 
---
 include/linux/ftrace.h |  5 ---
 kernel/trace/ftrace.c  | 84 ++
 2 files changed, 4 insertions(+), 85 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 1bd3a0356ae478..46b4b7ee28c41f 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -285,7 +285,6 @@ extern int ftrace_direct_func_count;
 int register_ftrace_direct(unsigned long ip, unsigned long addr);
 int unregister_ftrace_direct(unsigned long ip, unsigned long addr);
 int modify_ftrace_direct(unsigned long ip, unsigned long old_addr, unsigned 
long new_addr);
-struct ftrace_direct_func *ftrace_find_direct_func(unsigned long addr);
 int ftrace_modify_direct_caller(struct ftrace_func_entry *entry,
struct dyn_ftrace *rec,
unsigned long old_addr,
@@ -306,10 +305,6 @@ static inline int modify_ftrace_direct(unsigned long ip,
 {
return -ENOTSUPP;
 }
-static inline struct ftrace_direct_func *ftrace_find_direct_func(unsigned long 
addr)
-{
-   return NULL;
-}
 static inline int ftrace_modify_direct_caller(struct ftrace_func_entry *entry,
  struct dyn_ftrace *rec,
  unsigned long old_addr,
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 3844a4a1346a9c..7476f2458b6d95 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -5005,46 +5005,6 @@ ftrace_set_addr(struct ftrace_ops *ops, unsigned long 
ip, int remove,
 }
 
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
-
-struct ftrace_direct_func {
-   struct list_headnext;
-   unsigned long   addr;
-   int count;
-};
-
-static LIST_HEAD(ftrace_direct_funcs);
-
-/**
- * ftrace_find_direct_func - test an address if it is a registered direct 
caller
- * @addr: The address of a registered direct caller
- *
- * This searches to see if a ftrace direct caller has been registered
- * at a specific address, and if so, it returns a descriptor for it.
- *
- * This can be used by architecture code to see if an address is
- * a direct caller (trampoline) attached to a fentry/mcount location.
- * This is useful for the function_graph tracer, as it may need to
- * do adjustments if it traced a location that also has a direct
- * trampoline attached to it.
- */
-struct ftrace_direct_func *ftrace_find_direct_func(unsigned long addr)
-{
-   struct ftrace_direct_func *entry;
-   bool found = false;
-
-   /* May be called by fgraph trampoline (protected by rcu tasks) */
-   list_for_each_entry_rcu(entry, _direct_funcs, next) {
-   if (entry->addr == addr) {
-   found = true;
-   break;
-   }
-   }
-   if (found)
-   return entry;
-
-   return NULL;
-}
-
 /**
  * register_ftrace_direct - Call a custom trampoline directly
  * @ip: The address of the nop at the beginning of a function
@@ -5064,7 +5024,6 @@ struct ftrace_direct_func 
*ftrace_find_direct_func(unsigned long addr)
  */
 int register_ftrace_direct(unsigned long ip, unsigned long addr)
 {
-   struct ftrace_direct_func *direct;
struct ftrace_func_entry *entry;
struct ftrace_hash *free_hash = NULL;
struct dyn_ftrace *rec;
@@ -5118,19 +5077,7 @@ int register_ftrace_direct(unsigned long ip, unsigned 
long addr)
if (!entry)
goto out_unlock;
 
-   direct = ftrace_find_direct_func(addr);
-   if (!direct) {
-   direct = kmalloc(sizeof(*direct), GFP_KERNEL);
-   if (!direct) {
-   kfree(entry);
-   goto out_unlock;
-   }
-   direct->addr = addr;
-   direct->count = 0;
-   list_add_rcu(>next, _direct_funcs);
-   ftrace_direct_func_count++;
-   }
-
+   ftrace_direct_func_count++;
entry->ip = ip;
entry->direct = addr;
__add_hash_entry(direct_functions, entry);
@@ -5145,18 +5092,8 @@ int register_ftrace_direct(unsigned long ip, unsigned 
long addr)
 
if (ret) {
remove_hash_entry(direct_functions, entry);
+   ftrace_direct_func_count--;
kfree(entr

[RFC PATCH 01/14] ftrace: Fix updating FTRACE_FL_TRAMP

2020-11-26 Thread Naveen N. Rao
On powerpc, kprobe-direct.tc triggered FTRACE_WARN_ON() in
ftrace_get_addr_new() followed by the below message:
  Bad trampoline accounting at: 4222522f (wake_up_process+0xc/0x20) 
(f001)

The set of steps leading to this involved:
- modprobe ftrace-direct-too
- enable_probe
- modprobe ftrace-direct
- rmmod ftrace-direct <-- trigger

The problem turned out to be that we were not updating flags in the
ftrace record properly. From the above message about the trampoline
accounting being bad, it can be seen that the ftrace record still has
FTRACE_FL_TRAMP set though ftrace-direct module is going away. This
happens because we are checking if any ftrace_ops has the
FTRACE_FL_TRAMP flag set _before_ updating the filter hash.

The fix for this is to look for any _other_ ftrace_ops that also needs
FTRACE_FL_TRAMP.

Signed-off-by: Naveen N. Rao 
---
 kernel/trace/ftrace.c | 22 +-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 8185f7240095f4..9c1bba8cc51b03 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1629,6 +1629,8 @@ static bool test_rec_ops_needs_regs(struct dyn_ftrace 
*rec)
 static struct ftrace_ops *
 ftrace_find_tramp_ops_any(struct dyn_ftrace *rec);
 static struct ftrace_ops *
+ftrace_find_tramp_ops_any_other(struct dyn_ftrace *rec, struct ftrace_ops 
*op_exclude);
+static struct ftrace_ops *
 ftrace_find_tramp_ops_next(struct dyn_ftrace *rec, struct ftrace_ops *ops);
 
 static bool __ftrace_hash_rec_update(struct ftrace_ops *ops,
@@ -1778,7 +1780,7 @@ static bool __ftrace_hash_rec_update(struct ftrace_ops 
*ops,
 * to it.
 */
if (ftrace_rec_count(rec) == 1 &&
-   ftrace_find_tramp_ops_any(rec))
+   ftrace_find_tramp_ops_any_other(rec, ops))
rec->flags |= FTRACE_FL_TRAMP;
else
rec->flags &= ~FTRACE_FL_TRAMP;
@@ -2244,6 +2246,24 @@ ftrace_find_tramp_ops_any(struct dyn_ftrace *rec)
return NULL;
 }
 
+static struct ftrace_ops *
+ftrace_find_tramp_ops_any_other(struct dyn_ftrace *rec, struct ftrace_ops 
*op_exclude)
+{
+   struct ftrace_ops *op;
+   unsigned long ip = rec->ip;
+
+   do_for_each_ftrace_op(op, ftrace_ops_list) {
+
+   if (op == op_exclude || !op->trampoline)
+   continue;
+
+   if (hash_contains_ip(ip, op->func_hash))
+   return op;
+   } while_for_each_ftrace_op(op);
+
+   return NULL;
+}
+
 static struct ftrace_ops *
 ftrace_find_tramp_ops_next(struct dyn_ftrace *rec,
   struct ftrace_ops *op)
-- 
2.25.4



[RFC PATCH 00/14] powerpc64: Add support for ftrace direct calls

2020-11-26 Thread Naveen N. Rao
This series adds support for DYNAMIC_FTRACE_WITH_DIRECT_CALLS for 
powerpc64le.

This is mostly working fine for me, except for a soft lockup I see with 
the ftrace direct selftest. It happens when irqsoff tracer is being 
tested with the ftrace direct modules. This appears to be an existing 
upstream issue since I am able to reproduce the lockup without these 
patches. I will be looking into that to see if I can figure out the 
cause of those lockups.

In the meantime, I would appreciate a review of these patches.


- Naveen


Naveen N. Rao (14):
  ftrace: Fix updating FTRACE_FL_TRAMP
  ftrace: Fix DYNAMIC_FTRACE_WITH_DIRECT_CALLS dependency
  ftrace: Fix cleanup in error path of register_ftrace_direct()
  ftrace: Remove ftrace_find_direct_func()
  ftrace: Add architectural helpers for [un]register_ftrace_direct()
  powerpc: Add support for CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
  powerpc/ftrace: Remove dead code
  powerpc/ftrace: Use FTRACE_REGS_ADDR to identify the correct ftrace
trampoline
  powerpc/ftrace: Use a hash table for tracking ftrace stubs
  powerpc/ftrace: Drop assumptions about ftrace trampoline target
  powerpc/ftrace: Use GPR save/restore macros in ftrace_graph_caller()
  powerpc/ftrace: Drop saving LR to stack save area for -mprofile-kernel
  powerpc/ftrace: Add support for register_ftrace_direct() for
MPROFILE_KERNEL
  samples/ftrace: Add powerpc support for ftrace direct samples

 arch/powerpc/Kconfig  |   2 +
 arch/powerpc/include/asm/ftrace.h |  14 +
 arch/powerpc/include/asm/ptrace.h |  31 ++
 arch/powerpc/kernel/trace/ftrace.c| 314 +-
 .../powerpc/kernel/trace/ftrace_64_mprofile.S |  70 ++--
 include/linux/ftrace.h|   7 +-
 kernel/trace/Kconfig  |   2 +-
 kernel/trace/ftrace.c | 130 +++-
 samples/Kconfig   |   2 +-
 samples/ftrace/ftrace-direct-modify.c |  58 
 samples/ftrace/ftrace-direct-too.c|  48 ++-
 samples/ftrace/ftrace-direct.c|  45 ++-
 12 files changed, 519 insertions(+), 204 deletions(-)


base-commit: 4c202167192a77481310a3cacae9f12618b92216
-- 
2.25.4



Re: [PATCH 1/3 v5] ftrace: Have the callbacks receive a struct ftrace_regs instead of pt_regs

2020-11-12 Thread Naveen N. Rao

Hi Steven,

Steven Rostedt wrote:

From: "Steven Rostedt (VMware)" 

In preparation to have arguments of a function passed to callbacks attached
to functions as default, change the default callback prototype to receive a
struct ftrace_regs as the forth parameter instead of a pt_regs.

For callbacks that set the FL_SAVE_REGS flag in their ftrace_ops flags, they
will now need to get the pt_regs via a ftrace_get_regs() helper call. If
this is called by a callback that their ftrace_ops did not have a
FL_SAVE_REGS flag set, it that helper function will return NULL.

This will allow the ftrace_regs to hold enough just to get the parameters
and stack pointer, but without the worry that callbacks may have a pt_regs
that is not completely filled.

Reviewed-by: Masami Hiramatsu 
Signed-off-by: Steven Rostedt (VMware) 
---
 arch/x86/kernel/kprobes/ftrace.c  |  3 ++-
 fs/pstore/ftrace.c|  2 +-
 include/linux/ftrace.h| 16 ++--
 include/linux/kprobes.h   |  2 +-
 kernel/livepatch/patch.c  |  3 ++-
 kernel/trace/ftrace.c | 27 +++
 kernel/trace/trace_event_perf.c   |  2 +-
 kernel/trace/trace_events.c   |  2 +-
 kernel/trace/trace_functions.c|  9 -
 kernel/trace/trace_irqsoff.c  |  2 +-
 kernel/trace/trace_sched_wakeup.c |  2 +-
 kernel/trace/trace_selftest.c | 20 +++-
 kernel/trace/trace_stack.c|  2 +-
 13 files changed, 55 insertions(+), 37 deletions(-)






diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 629abaf25681..be73350955e4 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -345,7 +345,7 @@ static inline void wait_for_kprobe_optimizer(void) 
{ }

 #endif /* CONFIG_OPTPROBES */
 #ifdef CONFIG_KPROBES_ON_FTRACE
 extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *ops, struct pt_regs *regs);
+ struct ftrace_ops *ops, struct ftrace_regs 
*fregs);


This breaks the build on non-x86 architectures that support 
KPROBES_ON_FTRACE.


- Naveen


Re: [PATCH v3 8/9] perf mem: Return NULL for event 'ldst' on PowerPC

2020-10-28 Thread Naveen N. Rao

[+ Maddy]

Leo Yan wrote:

If user specifies event type "ldst", PowerPC's perf_mem_events__name()
will wrongly return the store event name "cpu/mem-stores/".

This patch changes to return NULL for the event "ldst" on PowerPC.

Signed-off-by: Leo Yan 
---
 tools/perf/arch/powerpc/util/mem-events.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tools/perf/arch/powerpc/util/mem-events.c 
b/tools/perf/arch/powerpc/util/mem-events.c
index 07fb5e049488..90c5a0760685 100644
--- a/tools/perf/arch/powerpc/util/mem-events.c
+++ b/tools/perf/arch/powerpc/util/mem-events.c
@@ -7,6 +7,8 @@ char *perf_mem_events__name(int i)
 {
if (i == PERF_MEM_EVENTS__LOAD)
return (char *) "cpu/mem-loads/";
-
-   return (char *) "cpu/mem-stores/";
+   else if (i == PERF_MEM_EVENTS__STORE)
+   return (char *) "cpu/mem-stores/";
+   else
+   return NULL;
 }
--
2.17.1




Re: [PATCH v1] powerpc/process: Remove unnecessary #ifdef CONFIG_FUNCTION_GRAPH_TRACER

2020-08-17 Thread Naveen N. Rao

Christophe Leroy wrote:

ftrace_graph_ret_addr() is always defined and returns 'ip' when
CONFIG_FUNCTION GRAPH_TRACER is not set.

So the #ifdef is not needed, remove it.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/process.c | 4 
 1 file changed, 4 deletions(-)


LGTM.
Acked-by: Naveen N. Rao 

- Naveen



Re: [PATCH] MAINTAINERS: adjust kprobes.rst entry to new location

2020-07-26 Thread Naveen N. Rao

Lukas Bulwahn wrote:

Commit 2165b82fde82 ("docs: Move kprobes.rst from staging/ to trace/")
moved kprobes.rst, but missed to adjust the MAINTAINERS entry.

Hence, ./scripts/get_maintainer.pl --self-test=patterns complains:

  warning: no file matchesF:Documentation/staging/kprobes.rst

Adjust the entry to the new file location.

Signed-off-by: Lukas Bulwahn 
---
Naveen, Masami-san, please ack.
Jonathan, please pick this minor non-urgent patch into docs-next.

applies cleanly on next-20200724


Ah, sorry. Hadn't noticed this change from Mauro.
Acked-by: Naveen N. Rao 

Thanks!
- Naveen



[PATCH 2/3] docs: staging/kprobes.rst: Move references to a separate appendix

2020-07-21 Thread Naveen N. Rao
Kprobes references are currently listed right after kretprobes example,
and appears to be part of the same section. Move this out to a separate
appendix for clarity.

Signed-off-by: Naveen N. Rao 
---
 Documentation/staging/kprobes.rst | 14 +-
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/Documentation/staging/kprobes.rst 
b/Documentation/staging/kprobes.rst
index 620699f5df22..b757b6dfd3d4 100644
--- a/Documentation/staging/kprobes.rst
+++ b/Documentation/staging/kprobes.rst
@@ -20,6 +20,7 @@ Kernel Probes (Kprobes)
   10. Deprecated Features
   Appendix A: The kprobes debugfs interface
   Appendix B: The kprobes sysctl interface
+  Appendix C: References
 
 Concepts: Kprobes and Return Probes
 =
@@ -710,11 +711,6 @@ Kretprobes Example
 
 See samples/kprobes/kretprobe_example.c
 
-For additional information on Kprobes, refer to the following URLs:
-
-- https://www.ibm.com/developerworks/library/l-kprobes/index.html
-- https://www.kernel.org/doc/ols/2006/ols2006v2-pages-109-124.pdf
-
 Deprecated Features
 ===
 
@@ -797,3 +793,11 @@ Note that this knob *changes* the optimized state. This 
means that optimized
 probes (marked [OPTIMIZED]) will be unoptimized ([OPTIMIZED] tag will be
 removed). If the knob is turned on, they will be optimized again.
 
+References
+==
+
+For additional information on Kprobes, refer to the following URLs:
+
+- https://www.ibm.com/developerworks/library/l-kprobes/index.html
+- https://www.kernel.org/doc/ols/2006/ols2006v2-pages-109-124.pdf
+
-- 
2.25.4



[PATCH 3/3] docs: Move kprobes.rst from staging/ to trace/

2020-07-21 Thread Naveen N. Rao
Kprobes contitutes a dynamic tracing technology and as such can be
moved alongside documentation of other tracing technologies.

Signed-off-by: Naveen N. Rao 
---
 Documentation/staging/index.rst  | 1 -
 Documentation/trace/index.rst| 1 +
 Documentation/{staging => trace}/kprobes.rst | 0
 3 files changed, 1 insertion(+), 1 deletion(-)
 rename Documentation/{staging => trace}/kprobes.rst (100%)

diff --git a/Documentation/staging/index.rst b/Documentation/staging/index.rst
index 184e6aece0a7..abd0d18254d2 100644
--- a/Documentation/staging/index.rst
+++ b/Documentation/staging/index.rst
@@ -7,7 +7,6 @@ Unsorted Documentation
:maxdepth: 2
 
crc32
-   kprobes
lzo
remoteproc
rpmsg
diff --git a/Documentation/trace/index.rst b/Documentation/trace/index.rst
index 7d83156c9ac1..879ebb9f647d 100644
--- a/Documentation/trace/index.rst
+++ b/Documentation/trace/index.rst
@@ -9,6 +9,7 @@ Linux Tracing Technologies
tracepoint-analysis
ftrace
ftrace-uses
+   kprobes
kprobetrace
uprobetracer
tracepoints
diff --git a/Documentation/staging/kprobes.rst b/Documentation/trace/kprobes.rst
similarity index 100%
rename from Documentation/staging/kprobes.rst
rename to Documentation/trace/kprobes.rst
-- 
2.25.4



[PATCH 0/3] docs: kprobes: Update URLs and move under trace/

2020-07-21 Thread Naveen N. Rao
This series updates some of the URLs in the kprobes document and moves 
the same under trace/ directory.

- Naveen


Naveen N. Rao (3):
  docs: staging/kprobes.rst: Update some of the references
  docs: staging/kprobes.rst: Move references to a separate appendix
  docs: Move kprobes.rst from staging/ to trace/

 Documentation/staging/index.rst  |  1 -
 Documentation/trace/index.rst|  1 +
 Documentation/{staging => trace}/kprobes.rst | 16 +---
 3 files changed, 10 insertions(+), 8 deletions(-)
 rename Documentation/{staging => trace}/kprobes.rst (99%)


base-commit: f33d4075e512274c0c6edcd7602da2cf1d5a0341
-- 
2.25.4



[PATCH 1/3] docs: staging/kprobes.rst: Update some of the references

2020-07-21 Thread Naveen N. Rao
Some of the kprobes references are not valid anymore. Update the URLs to
point to their changed locations, where appropriate. Drop two URLs which
do not exist anymore.

Reported-by: Masami Hiramatsu 
Signed-off-by: Naveen N. Rao 
---
 Documentation/staging/kprobes.rst | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/Documentation/staging/kprobes.rst 
b/Documentation/staging/kprobes.rst
index 8baab8832c5b..620699f5df22 100644
--- a/Documentation/staging/kprobes.rst
+++ b/Documentation/staging/kprobes.rst
@@ -712,10 +712,8 @@ See samples/kprobes/kretprobe_example.c
 
 For additional information on Kprobes, refer to the following URLs:
 
-- 
http://www-106.ibm.com/developerworks/library/l-kprobes.html?ca=dgr-lnxw42Kprobe
-- http://www.redhat.com/magazine/005mar05/features/kprobes/
-- http://www-users.cs.umn.edu/~boutcher/kprobes/
-- http://www.linuxsymposium.org/2006/linuxsymposium_procv2.pdf (pages 101-115)
+- https://www.ibm.com/developerworks/library/l-kprobes/index.html
+- https://www.kernel.org/doc/ols/2006/ols2006v2-pages-109-124.pdf
 
 Deprecated Features
 ===
-- 
2.25.4



Re: [PATCH] Replace HTTP links with HTTPS ones: kprobes

2020-07-21 Thread Naveen N. Rao

Masami Hiramatsu wrote:

On Tue, 14 Jul 2020 00:02:49 +0200
"Alexander A. Klimov"  wrote:




Am 13.07.20 um 16:20 schrieb Masami Hiramatsu:
> Hi Naveen and Alexander,
> 
> On Fri, 10 Jul 2020 19:14:47 +0530

> "Naveen N. Rao"  wrote:
> 
>> Masami Hiramatsu wrote:

>>> On Tue,  7 Jul 2020 21:49:59 +0200
>>> "Alexander A. Klimov"  wrote:
>>>
>>>> Rationale:
>>>> Reduces attack surface on kernel devs opening the links for MITM
>>>> as HTTPS traffic is much harder to manipulate.
>>>>
>>>> Deterministic algorithm:
>>>> For each file:
>>>>If not .svg:
>>>>  For each line:
>>>>If doesn't contain `\bxmlns\b`:
>>>>  For each link, `\bhttp://[^# \t\r\n]*(?:\w|/)`:
>>>>If both the HTTP and HTTPS versions
>>>>return 200 OK and serve the same content:
>>>>  Replace HTTP with HTTPS.
>>>
>>> OK, but it seems that some of them are disappeared :(
>>>
>>>   
http://www-106.ibm.com/developerworks/library/l-kprobes.html?ca=dgr-lnxw42Kprobe
>>>
>>>   -> https://www.ibm.com/developerworks/library/l-kprobes/index.html
>>
>> That looks right.
>>
>>>
>>>   http://www.redhat.com/magazine/005mar05/features/kprobes/
>>>
>>>   -> I can not find that.
>>
>> Ditto, we should drop that.
>>
>>>
>>>>   - http://www-users.cs.umn.edu/~boutcher/kprobes/
>>>>   - http://www.linuxsymposium.org/2006/linuxsymposium_procv2.pdf (pages 
101-115)
>>>
>>> Both are not found.
>>
>> It looks like the first link is gone, but there seems to be a copy in
>> the web archive:
>> 
https://web.archive.org/web/20061106154519/http://www-users.cs.umn.edu/~boutcher/kprobes/
>>
>> I suppose we can drop that link.
>>
>>>
>>> (OT, it seems http://www.linuxsymposium.org/ has been left from historical
>>>   Linux Symposium, we must remove it asap)
>>
>> Indeed, I think that link pointed to the Kprobes paper:
>> https://www.kernel.org/doc/ols/2006/ols2006v2-pages-109-124.pdf
> 
> Ah, there is.

> Thank you for the confirmation.
> Alexander, can you update other urls instead of just replacing the http with 
https?
Sry, but I don't steal others' work (on principle).

If I didn't the work (e.g. searched the replacement URL), I don't 
deserve to author the respective commit.


Alexander,
Next time, please ask if you aren't sure about including others' 
suggestions -- no need to term it as "stealing". Masami asked if you can 
include this, and I shared what I thought are the correct URLs so that 
they can be included. We don't mind someone else doing this change.


Besides, there are ways to acknowledge others, through a Suggested-by 
tag, as an example.




Also my HTTPSifying task is not done yet.


Hmm, Naveen, then, can you make the update?


Sure, I will send a patch.


- Naveen



Re: [PATCH] Replace HTTP links with HTTPS ones: kprobes

2020-07-10 Thread Naveen N. Rao

Masami Hiramatsu wrote:

On Tue,  7 Jul 2020 21:49:59 +0200
"Alexander A. Klimov"  wrote:


Rationale:
Reduces attack surface on kernel devs opening the links for MITM
as HTTPS traffic is much harder to manipulate.

Deterministic algorithm:
For each file:
  If not .svg:
For each line:
  If doesn't contain `\bxmlns\b`:
For each link, `\bhttp://[^# \t\r\n]*(?:\w|/)`:
  If both the HTTP and HTTPS versions
  return 200 OK and serve the same content:
Replace HTTP with HTTPS.


OK, but it seems that some of them are disappeared :(

 
http://www-106.ibm.com/developerworks/library/l-kprobes.html?ca=dgr-lnxw42Kprobe

 -> https://www.ibm.com/developerworks/library/l-kprobes/index.html


That looks right.



 http://www.redhat.com/magazine/005mar05/features/kprobes/

 -> I can not find that.


Ditto, we should drop that.




 - http://www-users.cs.umn.edu/~boutcher/kprobes/
 - http://www.linuxsymposium.org/2006/linuxsymposium_procv2.pdf (pages 101-115)


Both are not found.


It looks like the first link is gone, but there seems to be a copy in 
the web archive:

https://web.archive.org/web/20061106154519/http://www-users.cs.umn.edu/~boutcher/kprobes/

I suppose we can drop that link.



(OT, it seems http://www.linuxsymposium.org/ has been left from historical
 Linux Symposium, we must remove it asap)


Indeed, I think that link pointed to the Kprobes paper:
https://www.kernel.org/doc/ols/2006/ols2006v2-pages-109-124.pdf


- Naveen



Re: [PATCH] perf powerpc: Don't ignore sym-handling.c file

2020-05-13 Thread Naveen N. Rao

Arnaldo Carvalho de Melo wrote:

Em Mon, May 11, 2020 at 11:45:09PM +0530, Sandipan Das escreveu:


On 09/05/20 4:51 pm, Ravi Bangoria wrote:
> Commit 7eec00a74720 ("perf symbols: Consolidate symbol fixup issue")
> removed powerpc specific sym-handling.c file from Build. This wasn't
> caught by build CI because all functions in this file are declared
> as __weak in common code. Fix it.
> 
> Fixes: 7eec00a74720 ("perf symbols: Consolidate symbol fixup issue")

> Reported-by: Sandipan Das 
> Signed-off-by: Ravi Bangoria 
> ---
>  tools/perf/arch/powerpc/util/Build | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build

> index e5c9504f8586..e86e210bf514 100644
> --- a/tools/perf/arch/powerpc/util/Build
> +++ b/tools/perf/arch/powerpc/util/Build
> @@ -2,6 +2,7 @@ perf-y += header.o
>  perf-y += kvm-stat.o
>  perf-y += perf_regs.o
>  perf-y += mem-events.o
> +perf-y += sym-handling.o
>  
>  perf-$(CONFIG_DWARF) += dwarf-regs.o

>  perf-$(CONFIG_DWARF) += skip-callchain-idx.o
> 


Thanks for fixing this!

Acked-by: Sandipan Das 


Leo, Naveen, can you comment on this?


Shoot -- this is a bad miss, I should have caught it. FWIW:
Reviewed-by: Naveen N. Rao 

Thanks,
Naveen


Re: [PATCH 12/14] docs: move remaining stuff under Documentation/*.txt to Documentation/staging

2020-05-04 Thread Naveen N. Rao

Masami Hiramatsu wrote:

On Fri,  1 May 2020 17:37:56 +0200
Mauro Carvalho Chehab  wrote:


There are several files that I was unable to find a proper place
for them, and 3 ones that are still in plain old text format.

Let's place those stuff behind the carpet, as we'd like to keep the
root directory clean.

We can later discuss and move those into better places.


Hi Mauro,

Thanks for cleaning it up! Tentatively moving kprobes.txt under
staging/ is good to me.

Acked-by: Masami Hiramatsu 

BTW, I think kprobes.txt is under trace/ or we may be better
making a new core-api/events/ directory and prepare other event
systems (PMU, uprobes, and hw_breakpoint.) 


I think it would be good to move kprobes.txt under trace/ -- all other 
tracing bits are already present there, including uprobes.



- Naveen



Re: [PATCH 0/2] pseries/hotplug: Change the default behaviour of cede_offline

2019-09-18 Thread Naveen N. Rao

Michael Ellerman wrote:

"Naveen N. Rao"  writes:

Michael Ellerman wrote:

"Gautham R. Shenoy"  writes:

From: "Gautham R. Shenoy" 

Currently on Pseries Linux Guests, the offlined CPU can be put to one
of the following two states:
   - Long term processor cede (also called extended cede)
   - Returned to the Hypervisor via RTAS "stop-self" call.

This is controlled by the kernel boot parameter "cede_offline=on/off".

By default the offlined CPUs enter extended cede.


Since commit 3aa565f53c39 ("powerpc/pseries: Add hooks to put the CPU into an 
appropriate offline state") (Nov 2009)

Which you wrote :)

Why was that wrong?


The PHYP hypervisor considers CPUs in extended cede to be "active"
since the CPUs are still under the control fo the Linux Guests. Hence, when we 
change the
SMT modes by offlining the secondary CPUs, the PURR and the RWMR SPRs
will continue to count the values for offlined CPUs in extended cede
as if they are online.

One of the expectations with PURR is that the for an interval of time,
the sum of the PURR increments across the online CPUs of a core should
equal the number of timebase ticks for that interval.

This is currently not the case.


But why does that matter? It's just some accounting stuff, does it
actually break something meaningful?


Yes, this broke lparstat at the very least (though its quite unfortunate 
we took so long to notice).


By "so long" you mean 10 years?

Also I've never heard of lparstat, but I assume it's one of these tools
that's meant to behave like the AIX equivalent?


Yes, and yes. lparstat is part of powerpc-utils.



If it's been "broken" for 10 years and no one noticed, I'd argue the
current behaviour is now "correct" and fixing it would actually be a
breakage :)


:)
More on this below...




With SMT disabled, and under load:
  $ sudo lparstat 1 10

  System Configuration
  type=Shared mode=Uncapped smt=Off lcpu=2 mem=7759616 kB cpus=6 ent=1.00 


  %user  %sys %wait%idlephysc %entc lbusy  vcsw phint
  - - --- - - - -
  100.00  0.00  0.00 0.00 1.10 110.00 100.00 128784460 0
  100.00  0.00  0.00 0.00 1.07 107.00 100.00 128784860 0
  100.00  0.00  0.00 0.00 1.07 107.00 100.00 128785260 0
  100.00  0.00  0.00 0.00 1.07 107.00 100.00 128785662 0
  100.00  0.00  0.00 0.00 1.07 107.00 100.00 128786062 0
  100.00  0.00  0.00 0.00 1.07 107.00 100.00 128786462 0
  100.00  0.00  0.00 0.00 1.07 107.00 100.00 128786862 0
  100.00  0.00  0.00 0.00 1.07 107.00 100.00 128787262 0
  100.00  0.00  0.00 0.00 1.07 107.00 100.00 128787664 0
  100.00  0.00  0.00 0.00 1.07 107.00 100.00 128788064 0


What about that is wrong?


The 'physc' column represents cpu usage in units of physical cores.  
With 2 virtual cores ('lcpu=2') in uncapped, shared processor mode, we 
expect this to be closer to 2 when fully loaded (and spare capacity 
elsewhere in the system).





With cede_offline=off:
  $ sudo lparstat 1 10

  System Configuration
  type=Shared mode=Uncapped smt=Off lcpu=2 mem=7759616 kB cpus=6 ent=1.00 


  %user  %sys %wait%idlephysc %entc lbusy  vcsw phint
  - - --- - - - -
  100.00  0.00  0.00 0.00 1.94 194.00 100.00 128961588 0
  100.00  0.00  0.00 0.00 1.91 191.00 100.00 128961988 0
  100.00  0.00  0.00 0.00  inf   inf 100.00 128962392 0
  100.00  0.00  0.00 0.00 1.91 191.00 100.00 128962792 0
  100.00  0.00  0.00 0.00 1.91 191.00 100.00 128963192 0
  100.00  0.00  0.00 0.00 1.91 191.00 100.00 128963592 0
  100.00  0.00  0.00 0.00 1.91 191.00 100.00 128963992 0
  100.00  0.00  0.00 0.00 1.91 191.00 100.00 128964392 0
  100.00  0.00  0.00 0.00 1.91 191.00 100.00 128964792 0
  100.00  0.00  0.00 0.00 1.91 191.00 100.00 128965194 0

[The 'inf' values there show a different bug]

Also, since we expose [S]PURR through sysfs, any tools that make use of 
that directly are also affected due to this.


But again if we've had the current behaviour for 10 years then arguably
that's now the correct behaviour.


That's a fair point, and probably again points to this area getting less 
tested. One of the main reasons for this being caught now though, is 
that there are workloads being tested under lower SMT levels now. So, I 
suspect no one has been relying on this behavior and we can consider 
this to be a bug.



Thanks,
Naveen



Re: [PATCH 0/2] pseries/hotplug: Change the default behaviour of cede_offline

2019-09-18 Thread Naveen N. Rao

Michael Ellerman wrote:

"Gautham R. Shenoy"  writes:

From: "Gautham R. Shenoy" 

Currently on Pseries Linux Guests, the offlined CPU can be put to one
of the following two states:
   - Long term processor cede (also called extended cede)
   - Returned to the Hypervisor via RTAS "stop-self" call.

This is controlled by the kernel boot parameter "cede_offline=on/off".

By default the offlined CPUs enter extended cede.


Since commit 3aa565f53c39 ("powerpc/pseries: Add hooks to put the CPU into an 
appropriate offline state") (Nov 2009)

Which you wrote :)

Why was that wrong?


The PHYP hypervisor considers CPUs in extended cede to be "active"
since the CPUs are still under the control fo the Linux Guests. Hence, when we 
change the
SMT modes by offlining the secondary CPUs, the PURR and the RWMR SPRs
will continue to count the values for offlined CPUs in extended cede
as if they are online.

One of the expectations with PURR is that the for an interval of time,
the sum of the PURR increments across the online CPUs of a core should
equal the number of timebase ticks for that interval.

This is currently not the case.


But why does that matter? It's just some accounting stuff, does it
actually break something meaningful?


Yes, this broke lparstat at the very least (though its quite unfortunate 
we took so long to notice).


With SMT disabled, and under load:
 $ sudo lparstat 1 10

 System Configuration
 type=Shared mode=Uncapped smt=Off lcpu=2 mem=7759616 kB cpus=6 ent=1.00 


 %user  %sys %wait%idlephysc %entc lbusy  vcsw phint
 - - --- - - - -
 100.00  0.00  0.00 0.00 1.10 110.00 100.00 128784460 0
 100.00  0.00  0.00 0.00 1.07 107.00 100.00 128784860 0
 100.00  0.00  0.00 0.00 1.07 107.00 100.00 128785260 0
 100.00  0.00  0.00 0.00 1.07 107.00 100.00 128785662 0
 100.00  0.00  0.00 0.00 1.07 107.00 100.00 128786062 0
 100.00  0.00  0.00 0.00 1.07 107.00 100.00 128786462 0
 100.00  0.00  0.00 0.00 1.07 107.00 100.00 128786862 0
 100.00  0.00  0.00 0.00 1.07 107.00 100.00 128787262 0
 100.00  0.00  0.00 0.00 1.07 107.00 100.00 128787664 0
 100.00  0.00  0.00 0.00 1.07 107.00 100.00 128788064 0


With cede_offline=off:
 $ sudo lparstat 1 10

 System Configuration
 type=Shared mode=Uncapped smt=Off lcpu=2 mem=7759616 kB cpus=6 ent=1.00 


 %user  %sys %wait%idlephysc %entc lbusy  vcsw phint
 - - --- - - - -
 100.00  0.00  0.00 0.00 1.94 194.00 100.00 128961588 0
 100.00  0.00  0.00 0.00 1.91 191.00 100.00 128961988 0
 100.00  0.00  0.00 0.00  inf   inf 100.00 128962392 0
 100.00  0.00  0.00 0.00 1.91 191.00 100.00 128962792 0
 100.00  0.00  0.00 0.00 1.91 191.00 100.00 128963192 0
 100.00  0.00  0.00 0.00 1.91 191.00 100.00 128963592 0
 100.00  0.00  0.00 0.00 1.91 191.00 100.00 128963992 0
 100.00  0.00  0.00 0.00 1.91 191.00 100.00 128964392 0
 100.00  0.00  0.00 0.00 1.91 191.00 100.00 128964792 0
 100.00  0.00  0.00 0.00 1.91 191.00 100.00 128965194 0

[The 'inf' values there show a different bug]

Also, since we expose [S]PURR through sysfs, any tools that make use of 
that directly are also affected due to this.



- Naveen



Re: [PATCH 2/2] powerpc/watchpoint: Disable watchpoint hit by larx/stcx instructions

2019-09-10 Thread Naveen N. Rao

Ravi Bangoria wrote:

If watchpoint exception is generated by larx/stcx instructions, the
reservation created by larx gets lost while handling exception, and
thus stcx instruction always fails. Generally these instructions are
used in a while(1) loop, for example spinlocks. And because stcx
never succeeds, it loops forever and ultimately hangs the system.

Note that ptrace anyway works in one-shot mode and thus for ptrace
we don't change the behaviour. It's up to ptrace user to take care
of this.

Signed-off-by: Ravi Bangoria 
---
 arch/powerpc/kernel/hw_breakpoint.c | 49 +++--
 1 file changed, 33 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/kernel/hw_breakpoint.c 
b/arch/powerpc/kernel/hw_breakpoint.c
index 28ad3171bb82..9fa496a598ce 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -195,14 +195,32 @@ void thread_change_pc(struct task_struct *tsk, struct 
pt_regs *regs)
tsk->thread.last_hit_ubp = NULL;
 }
 
+static bool is_larx_stcx_instr(struct pt_regs *regs, unsigned int instr)

+{
+   int ret, type;
+   struct instruction_op op;
+
+   ret = analyse_instr(, regs, instr);
+   type = GETTYPE(op.type);
+   return (!ret && (type == LARX || type == STCX));
+}
+
 /*
  * Handle debug exception notifications.
  */
 static bool stepping_handler(struct pt_regs *regs, struct perf_event *bp,
 unsigned long addr)
 {
-   int stepped;
-   unsigned int instr;
+   unsigned int instr = 0;
+
+   if (__get_user_inatomic(instr, (unsigned int *)regs->nip))
+   goto fail;
+
+   if (is_larx_stcx_instr(regs, instr)) {
+   printk_ratelimited("Watchpoint: Can't emulate/single-step larx/"
+  "stcx instructions. Disabling 
watchpoint.\n");


The below WARN() uses the term 'breakpoint'. Better to use consistent 
terminology. I would rewrite the above as:

printk_ratelimited("Breakpoint hit on instruction that can't be emulated. 
"
"Breakpoint at 0x%lx will be disabled.\n", 
addr);

Otherwise:
Acked-by: Naveen N. Rao 

- Naveen


+   goto disable;
+   }
 
 	/* Do not emulate user-space instructions, instead single-step them */

if (user_mode(regs)) {
@@ -211,23 +229,22 @@ static bool stepping_handler(struct pt_regs *regs, struct 
perf_event *bp,
return false;
}
 
-	stepped = 0;

-   instr = 0;
-   if (!__get_user_inatomic(instr, (unsigned int *)regs->nip))
-   stepped = emulate_step(regs, instr);
+   if (!emulate_step(regs, instr))
+   goto fail;
 
+	return true;

+
+fail:
/*
-* emulate_step() could not execute it. We've failed in reliably
-* handling the hw-breakpoint. Unregister it and throw a warning
-* message to let the user know about it.
+* We've failed in reliably handling the hw-breakpoint. Unregister
+* it and throw a warning message to let the user know about it.
 */
-   if (!stepped) {
-   WARN(1, "Unable to handle hardware breakpoint. Breakpoint at "
-   "0x%lx will be disabled.", addr);
-   perf_event_disable_inatomic(bp);
-   return false;
-   }
-   return true;
+   WARN(1, "Unable to handle hardware breakpoint. Breakpoint at "
+   "0x%lx will be disabled.", addr);
+
+disable:
+   perf_event_disable_inatomic(bp);
+   return false;
 }
 
 int hw_breakpoint_handler(struct die_args *args)

--
2.21.0






[PATCH 3/3] powerpc: Use ftrace_graph_ret_addr() when unwinding

2019-09-05 Thread Naveen N. Rao
With support for HAVE_FUNCTION_GRAPH_RET_ADDR_PTR,
ftrace_graph_ret_addr() provides more robust unwinding when function
graph is in use. Update show_stack() to use the same.

With dump_stack() added to sysrq_sysctl_handler(), before this patch:
  root@(none):/sys/kernel/debug/tracing# cat /proc/sys/kernel/sysrq
  CPU: 0 PID: 218 Comm: cat Not tainted 5.3.0-rc7-00868-g8453ad4a078c-dirty #20
  Call Trace:
  [c000d1e13c30] [c006ab98] return_to_handler+0x0/0x40 
(dump_stack+0xe8/0x164) (unreliable)
  [c000d1e13c80] [c0145680] sysrq_sysctl_handler+0x48/0xb8
  [c000d1e13cd0] [c006ab98] return_to_handler+0x0/0x40 
(proc_sys_call_handler+0x274/0x2a0)
  [c000d1e13d60] [c006ab98] return_to_handler+0x0/0x40 
(return_to_handler+0x0/0x40)
  [c000d1e13d80] [c006ab98] return_to_handler+0x0/0x40 
(__vfs_read+0x3c/0x70)
  [c000d1e13dd0] [c006ab98] return_to_handler+0x0/0x40 
(vfs_read+0xb8/0x1b0)
  [c000d1e13e20] [c006ab98] return_to_handler+0x0/0x40 
(ksys_read+0x7c/0x140)

After this patch:
  Call Trace:
  [c000d1e33c30] [c006ab58] return_to_handler+0x0/0x40 
(dump_stack+0xe8/0x164) (unreliable)
  [c000d1e33c80] [c0145680] sysrq_sysctl_handler+0x48/0xb8
  [c000d1e33cd0] [c006ab58] return_to_handler+0x0/0x40 
(proc_sys_call_handler+0x274/0x2a0)
  [c000d1e33d60] [c006ab58] return_to_handler+0x0/0x40 
(__vfs_read+0x3c/0x70)
  [c000d1e33d80] [c006ab58] return_to_handler+0x0/0x40 
(vfs_read+0xb8/0x1b0)
  [c000d1e33dd0] [c006ab58] return_to_handler+0x0/0x40 
(ksys_read+0x7c/0x140)
  [c000d1e33e20] [c006ab58] return_to_handler+0x0/0x40 
(system_call+0x5c/0x68)

Signed-off-by: Naveen N. Rao 
---
 arch/powerpc/kernel/process.c | 19 ++-
 1 file changed, 6 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 24621e7e5033..f289bdd2b562 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -2047,10 +2047,8 @@ void show_stack(struct task_struct *tsk, unsigned long 
*stack)
int count = 0;
int firstframe = 1;
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
-   struct ftrace_ret_stack *ret_stack;
-   extern void return_to_handler(void);
-   unsigned long rth = (unsigned long)return_to_handler;
-   int curr_frame = 0;
+   unsigned long ret_addr;
+   int ftrace_idx = 0;
 #endif
 
if (tsk == NULL)
@@ -2079,15 +2077,10 @@ void show_stack(struct task_struct *tsk, unsigned long 
*stack)
if (!firstframe || ip != lr) {
printk("["REG"] ["REG"] %pS", sp, ip, (void *)ip);
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
-   if ((ip == rth) && curr_frame >= 0) {
-   ret_stack = ftrace_graph_get_ret_stack(current,
- curr_frame++);
-   if (ret_stack)
-   pr_cont(" (%pS)",
-   (void *)ret_stack->ret);
-   else
-   curr_frame = -1;
-   }
+   ret_addr = ftrace_graph_ret_addr(current,
+   _idx, ip, stack);
+   if (ret_addr != ip)
+   pr_cont(" (%pS)", (void *)ret_addr);
 #endif
if (firstframe)
pr_cont(" (unreliable)");
-- 
2.23.0



[PATCH 2/3] powerpc/ftrace: Enable HAVE_FUNCTION_GRAPH_RET_ADDR_PTR

2019-09-05 Thread Naveen N. Rao
This associates entries in the ftrace_ret_stack with corresponding stack
frames, enabling more robust stack unwinding. Also update the only user
of ftrace_graph_ret_addr() to pass the stack pointer.

Signed-off-by: Naveen N. Rao 
---
 arch/powerpc/include/asm/asm-prototypes.h  | 3 ++-
 arch/powerpc/include/asm/ftrace.h  | 2 ++
 arch/powerpc/kernel/stacktrace.c   | 2 +-
 arch/powerpc/kernel/trace/ftrace.c | 5 +++--
 arch/powerpc/kernel/trace/ftrace_32.S  | 1 +
 arch/powerpc/kernel/trace/ftrace_64_mprofile.S | 1 +
 arch/powerpc/kernel/trace/ftrace_64_pg.S   | 1 +
 7 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/asm-prototypes.h 
b/arch/powerpc/include/asm/asm-prototypes.h
index 49196d35e3bb..8561498e653c 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -134,7 +134,8 @@ extern int __ucmpdi2(u64, u64);
 
 /* tracing */
 void _mcount(void);
-unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip);
+unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip,
+   unsigned long sp);
 
 void pnv_power9_force_smt4_catch(void);
 void pnv_power9_force_smt4_release(void);
diff --git a/arch/powerpc/include/asm/ftrace.h 
b/arch/powerpc/include/asm/ftrace.h
index 3dfb80b86561..f54a08a2cd70 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -8,6 +8,8 @@
 #define MCOUNT_ADDR((unsigned long)(_mcount))
 #define MCOUNT_INSN_SIZE   4 /* sizeof mcount call */
 
+#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
+
 #ifdef __ASSEMBLY__
 
 /* Based off of objdump optput from glibc */
diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index 1e2276963f6d..e2a46cfed5fd 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -182,7 +182,7 @@ static int __save_stack_trace_tsk_reliable(struct 
task_struct *tsk,
 * FIXME: IMHO these tests do not belong in
 * arch-dependent code, they are generic.
 */
-   ip = ftrace_graph_ret_addr(tsk, _idx, ip, NULL);
+   ip = ftrace_graph_ret_addr(tsk, _idx, ip, stack);
 #ifdef CONFIG_KPROBES
/*
 * Mark stacktraces with kretprobed functions on them
diff --git a/arch/powerpc/kernel/trace/ftrace.c 
b/arch/powerpc/kernel/trace/ftrace.c
index be1ca98fce5c..7ea0ca044b65 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -944,7 +944,8 @@ int ftrace_disable_ftrace_graph_caller(void)
  * Hook the return address and push it in the stack of return addrs
  * in current thread info. Return the address we want to divert to.
  */
-unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip)
+unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip,
+   unsigned long sp)
 {
unsigned long return_hooker;
 
@@ -956,7 +957,7 @@ unsigned long prepare_ftrace_return(unsigned long parent, 
unsigned long ip)
 
return_hooker = ppc_function_entry(return_to_handler);
 
-   if (!function_graph_enter(parent, ip, 0, NULL))
+   if (!function_graph_enter(parent, ip, 0, (unsigned long *)sp))
parent = return_hooker;
 out:
return parent;
diff --git a/arch/powerpc/kernel/trace/ftrace_32.S 
b/arch/powerpc/kernel/trace/ftrace_32.S
index 183f608efb81..e023ae59c429 100644
--- a/arch/powerpc/kernel/trace/ftrace_32.S
+++ b/arch/powerpc/kernel/trace/ftrace_32.S
@@ -50,6 +50,7 @@ _GLOBAL(ftrace_stub)
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 _GLOBAL(ftrace_graph_caller)
+   addir5, r1, 48
/* load r4 with local address */
lwz r4, 44(r1)
subir4, r4, MCOUNT_INSN_SIZE
diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S 
b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
index 74acbf16a666..f9fd5f743eba 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
+++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
@@ -294,6 +294,7 @@ _GLOBAL(ftrace_graph_caller)
std r2, 24(r1)
ld  r2, PACATOC(r13)/* get kernel TOC in r2 */
 
+   addir5, r1, 112
mfctr   r4  /* ftrace_caller has moved local addr here */
std r4, 40(r1)
mflrr3  /* ftrace_caller has restored LR from stack */
diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.S 
b/arch/powerpc/kernel/trace/ftrace_64_pg.S
index e41a7d13c99c..6708e24db0ab 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_pg.S
+++ b/arch/powerpc/kernel/trace/ftrace_64_pg.S
@@ -41,6 +41,7 @@ _GLOBAL(ftrace_stub)
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 _GLOBAL(ftrace_graph_caller)
+   addir5, r1, 112
/* load r4 with local address */
ld  r4, 128(r1)
subir4

[PATCH 0/3] powerpc/ftrace: Enable HAVE_FUNCTION_GRAPH_RET_ADDR_PTR

2019-09-05 Thread Naveen N. Rao
Enable HAVE_FUNCTION_GRAPH_RET_ADDR_PTR for more robust stack unwinding 
when function graph tracer is in use. Convert powerpc show_stack() to 
use ftrace_graph_ret_addr() for better stack unwinding.

- Naveen

Naveen N. Rao (3):
  ftrace: Look up the address of return_to_handler() using helpers
  powerpc/ftrace: Enable HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
  powerpc: Use ftrace_graph_ret_addr() when unwinding

 arch/powerpc/include/asm/asm-prototypes.h |  3 ++-
 arch/powerpc/include/asm/ftrace.h |  2 ++
 arch/powerpc/kernel/process.c | 19 ++-
 arch/powerpc/kernel/stacktrace.c  |  2 +-
 arch/powerpc/kernel/trace/ftrace.c|  5 +++--
 arch/powerpc/kernel/trace/ftrace_32.S |  1 +
 .../powerpc/kernel/trace/ftrace_64_mprofile.S |  1 +
 arch/powerpc/kernel/trace/ftrace_64_pg.S  |  1 +
 kernel/trace/fgraph.c |  4 ++--
 9 files changed, 19 insertions(+), 19 deletions(-)

-- 
2.23.0



[PATCH 1/3] ftrace: Look up the address of return_to_handler() using helpers

2019-09-05 Thread Naveen N. Rao
This ensures that we use the right address on architectures that use
function descriptors.

Signed-off-by: Naveen N. Rao 
---
 kernel/trace/fgraph.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 8dfd5021b933..7950a0356042 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -276,7 +276,7 @@ unsigned long ftrace_graph_ret_addr(struct task_struct 
*task, int *idx,
int index = task->curr_ret_stack;
int i;
 
-   if (ret != (unsigned long)return_to_handler)
+   if (ret != (unsigned 
long)dereference_kernel_function_descriptor(return_to_handler))
return ret;
 
if (index < 0)
@@ -294,7 +294,7 @@ unsigned long ftrace_graph_ret_addr(struct task_struct 
*task, int *idx,
 {
int task_idx;
 
-   if (ret != (unsigned long)return_to_handler)
+   if (ret != (unsigned 
long)dereference_kernel_function_descriptor(return_to_handler))
return ret;
 
task_idx = task->curr_ret_stack;
-- 
2.23.0



Re: [PATCH v3 2/3] Powerpc64/Watchpoint: Don't ignore extraneous exceptions

2019-09-04 Thread Naveen N. Rao

Ravi Bangoria wrote:

On Powerpc64, watchpoint match range is double-word granular. On
a watchpoint hit, DAR is set to the first byte of overlap between
actual access and watched range. And thus it's quite possible that
DAR does not point inside user specified range. Ex, say user creates
a watchpoint with address range 0x1004 to 0x1007. So hw would be
configured to watch from 0x1000 to 0x1007. If there is a 4 byte
access from 0x1002 to 0x1005, DAR will point to 0x1002 and thus
interrupt handler considers it as extraneous, but it's actually not,
because part of the access belongs to what user has asked. So, let
kernel pass it on to user and let user decide what to do with it
instead of silently ignoring it. The drawback is, it can generate
false positive events.


I think you should do the additional validation here, instead of 
generating false positives. You should be able to read the instruction, 
run it through analyse_instr(), and then use OP_IS_LOAD_STORE() and 
GETSIZE() to understand the access range. This can be used to then 
perform a better match against what the user asked for.


- Naveen



Re: [PATCH -tip v2] kprobes: Prohibit probing on BUG() and WARN() address

2019-09-04 Thread Naveen N. Rao

Masami Hiramatsu wrote:

Since BUG() and WARN() may use a trap (e.g. UD2 on x86) to
get the address where the BUG() has occurred, kprobes can not
do single-step out-of-line that instruction. So prohibit
probing on such address.

Without this fix, if someone put a kprobe on WARN(), the
kernel will crash with invalid opcode error instead of
outputing warning message, because kernel can not find
correct bug address.

Signed-off-by: Masami Hiramatsu 
---
  Changes in v2:
   - Add find_bug() stub function for !CONFIG_GENERIC_BUG
   - Cast the p->addr to unsigned long.
---
 include/linux/bug.h |5 +
 kernel/kprobes.c|3 ++-
 2 files changed, 7 insertions(+), 1 deletion(-)


Acked-by: Naveen N. Rao 

- Naveen



Re: [PATCH 1/2] ftrace: Fix NULL pointer dereference in t_probe_next()

2019-09-04 Thread Naveen N. Rao

Steven Rostedt wrote:

On Thu,  4 Jul 2019 20:04:41 +0530
"Naveen N. Rao"  wrote:



 kernel/trace/ftrace.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 7b037295a1f1..0791eafb693d 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3093,6 +3093,10 @@ t_probe_next(struct seq_file *m, loff_t *pos)
hnd = >probe_entry->hlist;
 
 	hash = iter->probe->ops.func_hash->filter_hash;

+
+   if (!hash)
+   return NULL;
+
size = 1 << hash->size_bits;
 
  retry:


OK, I added this, but I'm also adding this on top:


Thanks, the additional comments do make this much clearer.

Regards,
Naveen



[tip: perf/core] perf arch powerpc: Sync powerpc syscall.tbl

2019-08-29 Thread tip-bot2 for Naveen N. Rao
The following commit has been merged into the perf/core branch of tip:

Commit-ID: 0a56e0603fa13af08816d673f6f71b68cda2fb2e
Gitweb:
https://git.kernel.org/tip/0a56e0603fa13af08816d673f6f71b68cda2fb2e
Author:Naveen N. Rao 
AuthorDate:Tue, 27 Aug 2019 12:44:58 +05:30
Committer: Arnaldo Carvalho de Melo 
CommitterDate: Wed, 28 Aug 2019 10:25:38 -03:00

perf arch powerpc: Sync powerpc syscall.tbl

Copy over powerpc syscall.tbl to grab changes from the below commits:

  commit cee3536d24a1 ("powerpc: Wire up clone3 syscall")
  commit 1a271a68e030 ("arch: mark syscall number 435 reserved for clone3")
  commit 7615d9e1780e ("arch: wire-up pidfd_open()")
  commit d8076bdb56af ("uapi: Wire up the mount API syscalls on non-x86 arches 
[ver #2]")
  commit 39036cd27273 ("arch: add pidfd and io_uring syscalls everywhere")
  commit 48166e6ea47d ("y2038: add 64-bit time_t syscalls to all 32-bit 
architectures")
  commit d33c577cccd0 ("y2038: rename old time and utime syscalls")
  commit 00bf25d693e7 ("y2038: use time32 syscall names on 32-bit")
  commit 8dabe7245bbc ("y2038: syscalls: rename y2038 compat syscalls")
  commit 0d6040d46817 ("arch: add split IPC system calls where needed")

Reported-by: Nicholas Piggin 
Signed-off-by: Naveen N. Rao 
Cc: Ravi Bangoria 
Cc: linuxppc-...@lists.ozlabs.org
Link: 
http://lkml.kernel.org/r/20190827071458.19897-1-naveen.n@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/arch/powerpc/entry/syscalls/syscall.tbl | 146 +---
 1 file changed, 119 insertions(+), 27 deletions(-)

diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl 
b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index db3bbb8..43f736e 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -20,7 +20,9 @@
 10 common  unlink  sys_unlink
 11 nospu   execve  sys_execve  
compat_sys_execve
 12 common  chdir   sys_chdir
-13 common  timesys_time
compat_sys_time
+13 32  timesys_time32
+13 64  timesys_time
+13 spu timesys_time
 14 common  mknod   sys_mknod
 15 common  chmod   sys_chmod
 16 common  lchown  sys_lchown
@@ -36,14 +38,17 @@
 22 spu umount  sys_ni_syscall
 23 common  setuid  sys_setuid
 24 common  getuid  sys_getuid
-25 common  stime   sys_stime   
compat_sys_stime
+25 32  stime   sys_stime32
+25 64  stime   sys_stime
+25 spu stime   sys_stime
 26 nospu   ptrace  sys_ptrace  
compat_sys_ptrace
 27 common  alarm   sys_alarm
 28 32  oldfstatsys_fstat   
sys_ni_syscall
 28 64  oldfstatsys_ni_syscall
 28 spu oldfstatsys_ni_syscall
 29 nospu   pause   sys_pause
-30 nospu   utime   sys_utime   
compat_sys_utime
+30 32  utime   sys_utime32
+30 64  utime   sys_utime
 31 common  sttysys_ni_syscall
 32 common  gttysys_ni_syscall
 33 common  access  sys_access
@@ -157,7 +162,9 @@
 121common  setdomainname   sys_setdomainname
 122common  uname   sys_newuname
 123common  modify_ldt  sys_ni_syscall
-124common  adjtimexsys_adjtimex
compat_sys_adjtimex
+12432  adjtimexsys_adjtimex_time32
+12464  adjtimexsys_adjtimex
+124spu adjtimexsys_adjtimex
 125common  mprotectsys_mprotect
 12632  sigprocmask sys_sigprocmask 
compat_sys_sigprocmask
 12664  sigprocmask sys_ni_syscall
@@ -198,8 +205,12 @@
 158common  sched_yield sys_sched_yield
 159common  sched_get_priority_max  sys_sched_get_priority_max
 160common  sched_get_priority_min  sys_sched_get_priority_min
-161common  sched_rr_get_interval   sys_sched_rr_get_interval  

[PATCH] perf arch powerpc: Sync powerpc syscall.tbl

2019-08-27 Thread Naveen N. Rao
Copy over powerpc syscall.tbl to grab changes from the below commits:
  commit cee3536d24a1 ("powerpc: Wire up clone3 syscall")
  commit 1a271a68e030 ("arch: mark syscall number 435 reserved for clone3")
  commit 7615d9e1780e ("arch: wire-up pidfd_open()")
  commit d8076bdb56af ("uapi: Wire up the mount API syscalls on non-x86 arches 
[ver #2]")
  commit 39036cd27273 ("arch: add pidfd and io_uring syscalls everywhere")
  commit 48166e6ea47d ("y2038: add 64-bit time_t syscalls to all 32-bit 
architectures")
  commit d33c577cccd0 ("y2038: rename old time and utime syscalls")
  commit 00bf25d693e7 ("y2038: use time32 syscall names on 32-bit")
  commit 8dabe7245bbc ("y2038: syscalls: rename y2038 compat syscalls")
  commit 0d6040d46817 ("arch: add split IPC system calls where needed")

Reported-by: Nicholas Piggin 
Signed-off-by: Naveen N. Rao 
---
 .../arch/powerpc/entry/syscalls/syscall.tbl   | 146 ++
 1 file changed, 119 insertions(+), 27 deletions(-)

diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl 
b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index db3bbb8744af..43f736ed47f2 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -20,7 +20,9 @@
 10 common  unlink  sys_unlink
 11 nospu   execve  sys_execve  
compat_sys_execve
 12 common  chdir   sys_chdir
-13 common  timesys_time
compat_sys_time
+13 32  timesys_time32
+13 64  timesys_time
+13 spu timesys_time
 14 common  mknod   sys_mknod
 15 common  chmod   sys_chmod
 16 common  lchown  sys_lchown
@@ -36,14 +38,17 @@
 22 spu umount  sys_ni_syscall
 23 common  setuid  sys_setuid
 24 common  getuid  sys_getuid
-25 common  stime   sys_stime   
compat_sys_stime
+25 32  stime   sys_stime32
+25 64  stime   sys_stime
+25 spu stime   sys_stime
 26 nospu   ptrace  sys_ptrace  
compat_sys_ptrace
 27 common  alarm   sys_alarm
 28 32  oldfstatsys_fstat   
sys_ni_syscall
 28 64  oldfstatsys_ni_syscall
 28 spu oldfstatsys_ni_syscall
 29 nospu   pause   sys_pause
-30 nospu   utime   sys_utime   
compat_sys_utime
+30 32  utime   sys_utime32
+30 64  utime   sys_utime
 31 common  sttysys_ni_syscall
 32 common  gttysys_ni_syscall
 33 common  access  sys_access
@@ -157,7 +162,9 @@
 121common  setdomainname   sys_setdomainname
 122common  uname   sys_newuname
 123common  modify_ldt  sys_ni_syscall
-124common  adjtimexsys_adjtimex
compat_sys_adjtimex
+12432  adjtimexsys_adjtimex_time32
+12464  adjtimexsys_adjtimex
+124spu adjtimexsys_adjtimex
 125common  mprotectsys_mprotect
 12632  sigprocmask sys_sigprocmask 
compat_sys_sigprocmask
 12664  sigprocmask sys_ni_syscall
@@ -198,8 +205,12 @@
 158common  sched_yield sys_sched_yield
 159common  sched_get_priority_max  sys_sched_get_priority_max
 160common  sched_get_priority_min  sys_sched_get_priority_min
-161common  sched_rr_get_interval   sys_sched_rr_get_interval   
compat_sys_sched_rr_get_interval
-162common  nanosleep   sys_nanosleep   
compat_sys_nanosleep
+16132  sched_rr_get_interval   sys_sched_rr_get_interval_time32
+16164  sched_rr_get_interval   sys_sched_rr_get_interval
+161spu sched_rr_get_interval   sys_sched_rr_get_interval
+16232  nanosleep   sys_nanosleep_time32
+16264  nanosleep   sys_nanosleep
+162spu nanosleep   sys_nanosleep
 163common  mremap

Re: [PATCH] bpf: handle 32-bit zext during constant blinding

2019-08-26 Thread Naveen N. Rao

Jiong Wang wrote:


Naveen N. Rao writes:


Since BPF constant blinding is performed after the verifier pass, the
ALU32 instructions inserted for doubleword immediate loads don't have a
corresponding zext instruction. This is causing a kernel oops on powerpc
and can be reproduced by running 'test_cgroup_storage' with
bpf_jit_harden=2.

Fix this by emitting BPF_ZEXT during constant blinding if
prog->aux->verifier_zext is set.

Fixes: a4b1d3c1ddf6cb ("bpf: verifier: insert zero extension according to analysis 
result")
Reported-by: Michael Ellerman 
Signed-off-by: Naveen N. Rao 


Thanks for the fix.

Reviewed-by: Jiong Wang 

Just two other comments during review in case I am wrong on somewhere.

  - Use verifier_zext instead of bpf_jit_needs_zext() seems better, even
though the latter could avoid extending function argument.

Because JIT back-ends look at verifier_zext, true means zext inserted
by verifier so JITs won't do the code-gen.

Use verifier_zext is sort of keeping JIT blinding the same behaviour
has verifier even though blinding doesn't belong to verifier, but for
such insn patching, it could be seen as a extension of verifier,
therefore use verifier_zext seems better than bpf_jit_needs_zext() to
me.
   
  - JIT blinding is also escaping the HI32 randomization which happens

inside verifier, otherwise x86-64 regression should have caught this issue.


Jiong,
Thanks for the review.

Alexei, Daniel,
Can you please pick this up for v5.3. This is a regression and is 
causing a crash on powerpc.



- Naveen



[PATCH] bpf: handle 32-bit zext during constant blinding

2019-08-21 Thread Naveen N. Rao
Since BPF constant blinding is performed after the verifier pass, the
ALU32 instructions inserted for doubleword immediate loads don't have a
corresponding zext instruction. This is causing a kernel oops on powerpc
and can be reproduced by running 'test_cgroup_storage' with
bpf_jit_harden=2.

Fix this by emitting BPF_ZEXT during constant blinding if
prog->aux->verifier_zext is set.

Fixes: a4b1d3c1ddf6cb ("bpf: verifier: insert zero extension according to 
analysis result")
Reported-by: Michael Ellerman 
Signed-off-by: Naveen N. Rao 
---
Changes since RFC:
- Removed changes to ALU32 and JMP32 ops since those don't alter program 
  execution, and the verifier would have already accounted for them.  


 kernel/bpf/core.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 8191a7db2777..66088a9e9b9e 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -890,7 +890,8 @@ int bpf_jit_get_func_addr(const struct bpf_prog *prog,
 
 static int bpf_jit_blind_insn(const struct bpf_insn *from,
  const struct bpf_insn *aux,
- struct bpf_insn *to_buff)
+ struct bpf_insn *to_buff,
+ bool emit_zext)
 {
struct bpf_insn *to = to_buff;
u32 imm_rnd = get_random_int();
@@ -1005,6 +1006,8 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from,
case 0: /* Part 2 of BPF_LD | BPF_IMM | BPF_DW. */
*to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ 
aux[0].imm);
*to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
+   if (emit_zext)
+   *to++ = BPF_ZEXT_REG(BPF_REG_AX);
*to++ = BPF_ALU64_REG(BPF_OR,  aux[0].dst_reg, BPF_REG_AX);
break;
 
@@ -1088,7 +1091,8 @@ struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog 
*prog)
insn[1].code == 0)
memcpy(aux, insn, sizeof(aux));
 
-   rewritten = bpf_jit_blind_insn(insn, aux, insn_buff);
+   rewritten = bpf_jit_blind_insn(insn, aux, insn_buff,
+   clone->aux->verifier_zext);
if (!rewritten)
continue;
 
-- 
2.22.0



Re: Regression fix for bpf in v5.3 (was Re: [RFC PATCH] bpf: handle 32-bit zext during constant blinding)

2019-08-21 Thread Naveen N. Rao

Jiong Wang wrote:


Michael Ellerman writes:


"Naveen N. Rao"  writes:

Since BPF constant blinding is performed after the verifier pass, there
are certain ALU32 instructions inserted which don't have a corresponding
zext instruction inserted after. This is causing a kernel oops on
powerpc and can be reproduced by running 'test_cgroup_storage' with
bpf_jit_harden=2.

Fix this by emitting BPF_ZEXT during constant blinding if
prog->aux->verifier_zext is set.

Fixes: a4b1d3c1ddf6cb ("bpf: verifier: insert zero extension according to analysis 
result")
Reported-by: Michael Ellerman 
Signed-off-by: Naveen N. Rao 
---
This approach (the location where zext is being introduced below, in 
particular) works for powerpc, but I am not entirely sure if this is 
sufficient for other architectures as well. This is broken on v5.3-rc4.


Any comment on this?


Have commented on https://marc.info/?l=linux-netdev=156637836024743=2

The fix looks correct to me on "BPF_LD | BPF_IMM | BPF_DW", but looks
unnecessary on two other places. It would be great if you or Naveen could
confirm it.


Jiong,
Thanks for the review. I can now see why the other two changes are not 
necessary. I will post a follow-on patch.


Thanks!
- Naveen



Re: [RFC PATCH] bpf: handle 32-bit zext during constant blinding

2019-08-21 Thread Naveen N. Rao

Naveen N. Rao wrote:

Since BPF constant blinding is performed after the verifier pass, there
are certain ALU32 instructions inserted which don't have a corresponding
zext instruction inserted after. This is causing a kernel oops on
powerpc and can be reproduced by running 'test_cgroup_storage' with
bpf_jit_harden=2.

Fix this by emitting BPF_ZEXT during constant blinding if
prog->aux->verifier_zext is set.

Fixes: a4b1d3c1ddf6cb ("bpf: verifier: insert zero extension according to analysis 
result")
Reported-by: Michael Ellerman 
Signed-off-by: Naveen N. Rao 
---
This approach (the location where zext is being introduced below, in 
particular) works for powerpc, but I am not entirely sure if this is 
sufficient for other architectures as well. This is broken on v5.3-rc4.


Alexie, Daniel, Jiong,
Any feedback on this?

- Naveen



Re: [PATCH 13/16] include/asm-generic: prefer __section from compiler_attributes.h

2019-08-19 Thread Naveen N. Rao

Nick Desaulniers wrote:

Reported-by: Sedat Dilek 
Suggested-by: Josh Poimboeuf 
Signed-off-by: Nick Desaulniers 
---


Acked-by: Naveen N. Rao 

- Naveen



Re: [PATCH 4/4] arm64: implement KPROBES_ON_FTRACE

2019-08-19 Thread Naveen N. Rao

Jisheng Zhang wrote:

This patch implements KPROBES_ON_FTRACE for arm64.

~ # mount -t debugfs debugfs /sys/kernel/debug/
~ # cd /sys/kernel/debug/
/sys/kernel/debug # echo 'p _do_fork' > tracing/kprobe_events

before the patch:

/sys/kernel/debug # cat kprobes/list
ff801009ff7c  k  _do_fork+0x4[DISABLED]


This looks wrong -- we should not be allowing kprobe to be registered on 
ftrace address without KPROBES_ON_FTRACE. Is _do_fork+0x4 the location 
of ftrace entry on arm64?


- Naveen



Re: [PATCH 1/4] kprobes: adjust kprobe addr for KPROBES_ON_FTRACE

2019-08-19 Thread Naveen N. Rao

Jisheng Zhang wrote:

For KPROBES_ON_FTRACE case, we need to adjust the kprobe's addr
correspondingly.

Signed-off-by: Jisheng Zhang 
---
 kernel/kprobes.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 9873fc627d61..f8400753a8a9 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1560,6 +1560,9 @@ int register_kprobe(struct kprobe *p)
addr = kprobe_addr(p);
if (IS_ERR(addr))
return PTR_ERR(addr);
+#ifdef CONFIG_KPROBES_ON_FTRACE
+   addr = (kprobe_opcode_t *)ftrace_call_adjust((unsigned long)addr);
+#endif
p->addr = addr;


I'm not sure what this is achieving, but looks wrong to me.

If you intend to have kprobes default to using ftrace entry for probing 
functions, consider over-riding kprobe_lookup_name() -- see powerpc 
variant for example.



- Naveen



[RFC PATCH] bpf: handle 32-bit zext during constant blinding

2019-08-13 Thread Naveen N. Rao
Since BPF constant blinding is performed after the verifier pass, there
are certain ALU32 instructions inserted which don't have a corresponding
zext instruction inserted after. This is causing a kernel oops on
powerpc and can be reproduced by running 'test_cgroup_storage' with
bpf_jit_harden=2.

Fix this by emitting BPF_ZEXT during constant blinding if
prog->aux->verifier_zext is set.

Fixes: a4b1d3c1ddf6cb ("bpf: verifier: insert zero extension according to 
analysis result")
Reported-by: Michael Ellerman 
Signed-off-by: Naveen N. Rao 
---
This approach (the location where zext is being introduced below, in 
particular) works for powerpc, but I am not entirely sure if this is 
sufficient for other architectures as well. This is broken on v5.3-rc4.

- Naveen


 kernel/bpf/core.c | 14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 8191a7db2777..d84146e6fd9e 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -890,7 +890,8 @@ int bpf_jit_get_func_addr(const struct bpf_prog *prog,
 
 static int bpf_jit_blind_insn(const struct bpf_insn *from,
  const struct bpf_insn *aux,
- struct bpf_insn *to_buff)
+ struct bpf_insn *to_buff,
+ bool emit_zext)
 {
struct bpf_insn *to = to_buff;
u32 imm_rnd = get_random_int();
@@ -939,6 +940,8 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from,
*to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
*to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
*to++ = BPF_ALU32_REG(from->code, from->dst_reg, BPF_REG_AX);
+   if (emit_zext)
+   *to++ = BPF_ZEXT_REG(from->dst_reg);
break;
 
case BPF_ALU64 | BPF_ADD | BPF_K:
@@ -992,6 +995,10 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from,
off -= 2;
*to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
*to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
+   if (emit_zext) {
+   *to++ = BPF_ZEXT_REG(BPF_REG_AX);
+   off--;
+   }
*to++ = BPF_JMP32_REG(from->code, from->dst_reg, BPF_REG_AX,
  off);
break;
@@ -1005,6 +1012,8 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from,
case 0: /* Part 2 of BPF_LD | BPF_IMM | BPF_DW. */
*to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ 
aux[0].imm);
*to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
+   if (emit_zext)
+   *to++ = BPF_ZEXT_REG(BPF_REG_AX);
*to++ = BPF_ALU64_REG(BPF_OR,  aux[0].dst_reg, BPF_REG_AX);
break;
 
@@ -1088,7 +1097,8 @@ struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog 
*prog)
insn[1].code == 0)
memcpy(aux, insn, sizeof(aux));
 
-   rewritten = bpf_jit_blind_insn(insn, aux, insn_buff);
+   rewritten = bpf_jit_blind_insn(insn, aux, insn_buff,
+   clone->aux->verifier_zext);
if (!rewritten)
continue;
 
-- 
2.22.0



Re: [PATCH 0/2] ftrace: two fixes with func_probes handling

2019-08-08 Thread Naveen N. Rao

Naveen N. Rao wrote:
Two patches addressing bugs in ftrace function probe handling. The first 
patch addresses a NULL pointer dereference reported by LTP tests, while 
the second one is a trivial patch to address a missing check for return 
value, found by code inspection.


Steven,
Can you please take a look at these patches?

Thanks,
Naveen



[PATCH 2/2] ftrace: Check for successful allocation of hash

2019-07-04 Thread Naveen N. Rao
In register_ftrace_function_probe(), we are not checking the return
value of alloc_and_copy_ftrace_hash(). The subsequent call to
ftrace_match_records() may end up dereferencing the same. Add a check to
ensure this doesn't happen.

Signed-off-by: Naveen N. Rao 
---
 kernel/trace/ftrace.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 0791eafb693d..0d5f7d4a4936 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -4328,6 +4328,11 @@ register_ftrace_function_probe(char *glob, struct 
trace_array *tr,
old_hash = *orig_hash;
hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, old_hash);
 
+   if (!hash) {
+   ret = -ENOMEM;
+   goto out;
+   }
+
ret = ftrace_match_records(hash, glob, strlen(glob));
 
/* Nothing found? */
-- 
2.22.0



[PATCH 1/2] ftrace: Fix NULL pointer dereference in t_probe_next()

2019-07-04 Thread Naveen N. Rao
LTP testsuite on powerpc results in the below crash:

  Unable to handle kernel paging request for data at address 0x
  Faulting instruction address: 0xc029d800
  Oops: Kernel access of bad area, sig: 11 [#1]
  LE SMP NR_CPUS=2048 NUMA PowerNV
  ...
  CPU: 68 PID: 96584 Comm: cat Kdump: loaded Tainted: GW
  NIP:  c029d800 LR: c029dac4 CTR: c01e6ad0
  REGS: c0002017fae8ba10 TRAP: 0300   Tainted: GW
  MSR:  90009033   CR: 28022422  XER: 2004
  CFAR: c029d90c DAR:  DSISR: 4000 IRQMASK: 0
  ...
  NIP [c029d800] t_probe_next+0x60/0x180
  LR [c029dac4] t_mod_start+0x1a4/0x1f0
  Call Trace:
  [c0002017fae8bc90] [c0cdbc40] _cond_resched+0x10/0xb0 (unreliable)
  [c0002017fae8bce0] [c02a15b0] t_start+0xf0/0x1c0
  [c0002017fae8bd30] [c04ec2b4] seq_read+0x184/0x640
  [c0002017fae8bdd0] [c04a57bc] sys_read+0x10c/0x300
  [c0002017fae8be30] [c000b388] system_call+0x5c/0x70

The test (ftrace_set_ftrace_filter.sh) is part of ftrace stress tests
and the crash happens when the test does 'cat
$TRACING_PATH/set_ftrace_filter'.

The address points to the second line below, in t_probe_next(), where
filter_hash is dereferenced:
  hash = iter->probe->ops.func_hash->filter_hash;
  size = 1 << hash->size_bits;

This happens due to a race with register_ftrace_function_probe(). A new
ftrace_func_probe is created and added into the func_probes list in
trace_array under ftrace_lock. However, before initializing the filter,
we drop ftrace_lock, and re-acquire it after acquiring regex_lock. If
another process is trying to read set_ftrace_filter, it will be able to
acquire ftrace_lock during this window and it will end up seeing a NULL
filter_hash.

Fix this by just checking for a NULL filter_hash in t_probe_next(). If
the filter_hash is NULL, then this probe is just being added and we can
simply return from here.

Signed-off-by: Naveen N. Rao 
---
 kernel/trace/ftrace.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 7b037295a1f1..0791eafb693d 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3093,6 +3093,10 @@ t_probe_next(struct seq_file *m, loff_t *pos)
hnd = >probe_entry->hlist;
 
hash = iter->probe->ops.func_hash->filter_hash;
+
+   if (!hash)
+   return NULL;
+
size = 1 << hash->size_bits;
 
  retry:
-- 
2.22.0



[PATCH 0/2] ftrace: two fixes with func_probes handling

2019-07-04 Thread Naveen N. Rao
Two patches addressing bugs in ftrace function probe handling. The first 
patch addresses a NULL pointer dereference reported by LTP tests, while 
the second one is a trivial patch to address a missing check for return 
value, found by code inspection.

- Naveen


Naveen N. Rao (2):
  ftrace: Fix NULL pointer dereference in t_probe_next()
  ftrace: Check for successful allocation of hash

 kernel/trace/ftrace.c | 9 +
 1 file changed, 9 insertions(+)

-- 
2.22.0



Re: [PATCH v2 4/7] powerpc/ftrace: Additionally nop out the preceding mflr with -mprofile-kernel

2019-07-01 Thread Naveen N. Rao

Steven Rostedt wrote:

On Thu, 27 Jun 2019 20:58:20 +0530
"Naveen N. Rao"  wrote:



> But interesting, I don't see a synchronize_rcu_tasks() call
> there.  

We felt we don't need it in this case. We patch the branch to ftrace 
with a nop first. Other cpus should see that first. But, now that I 
think about it, should we add a memory barrier to ensure the writes get 
ordered properly?


Do you send an ipi to the other CPUs. I would just to be safe.







We are handling this through ftrace_replace_code() and 
__ftrace_make_call_prep() below. For FTRACE_UPDATE_MAKE_CALL, we patch 
in the mflr, followed by smp_call_function(isync) and 
synchronize_rcu_tasks() before we proceed to patch the branch to ftrace.


I don't see any other scenario where we end up in 
__ftrace_make_nop_kernel() without going through ftrace_replace_code().  
For kernel modules, this can happen during module load/init and so, I 
patch out both instructions in __ftrace_make_call() above without any 
synchronization.


Am I missing anything?



No, I think I got confused ;-), it's the patch out that I was worried
about, but when I was going through the scenario, I somehow turned it
into the patching in (which I already audited :-p). I was going to
reply with just the top part of this email, but then the confusion
started :-/

OK, yes, patching out should be fine, and you already covered the
patching in. Sorry for the noise.

Just to confirm and totally remove the confusion, the patch does:

:
mflrr0 <-- preempt here
bl  _mcount

:
mflrr0
nop

And this is fine regardless.

OK, Reviewed-by: Steven Rostedt (VMware) 


Thanks for confirming! We do need an IPI to be sure, as you pointed out 
above. I will have the patching out take the same path to simplify 
things.



- Naveen




Re: [PATCH v2 4/7] powerpc/ftrace: Additionally nop out the preceding mflr with -mprofile-kernel

2019-06-27 Thread Naveen N. Rao

Hi Steven,
Thanks for the review!

Steven Rostedt wrote:

On Thu, 27 Jun 2019 16:53:52 +0530
"Naveen N. Rao"  wrote:


With -mprofile-kernel, gcc emits 'mflr r0', followed by 'bl _mcount' to
enable function tracing and profiling. So far, with dynamic ftrace, we
used to only patch out the branch to _mcount(). However, mflr is
executed by the branch unit that can only execute one per cycle on
POWER9 and shared with branches, so it would be nice to avoid it where
possible.

We cannot simply nop out the mflr either. When enabling function
tracing, there can be a race if tracing is enabled when some thread was
interrupted after executing a nop'ed out mflr. In this case, the thread
would execute the now-patched-in branch to _mcount() without having
executed the preceding mflr.

To solve this, we now enable function tracing in 2 steps: patch in the
mflr instruction, use 'smp_call_function(isync);
synchronize_rcu_tasks()' to ensure all existing threads make progress,
and then patch in the branch to _mcount(). We override
ftrace_replace_code() with a powerpc64 variant for this purpose.


You may want to explain that you do the reverse when patching it out.
That is, patch out the "bl _mcount" into a nop and then patch out the
"mflr r0".


Sure. I think we can add:
"When disabling function tracing, we can nop out the two instructions 
without need for any synchronization in between, as long as we nop out 
the branch to ftrace first. The lone 'mflr r0' is harmless. Finally, 
with FTRACE_UPDATE_MODIFY_CALL, no changes are needed since we are 
simply changing where the branch to ftrace goes."



But interesting, I don't see a synchronize_rcu_tasks() call
there.


We felt we don't need it in this case. We patch the branch to ftrace 
with a nop first. Other cpus should see that first. But, now that I 
think about it, should we add a memory barrier to ensure the writes get 
ordered properly?







Suggested-by: Nicholas Piggin 
Reviewed-by: Nicholas Piggin 
Signed-off-by: Naveen N. Rao 
---
 arch/powerpc/kernel/trace/ftrace.c | 258 ++---
 1 file changed, 236 insertions(+), 22 deletions(-)

diff --git a/arch/powerpc/kernel/trace/ftrace.c 
b/arch/powerpc/kernel/trace/ftrace.c
index 517662a56bdc..86c2b50dcaa9 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -125,7 +125,7 @@ __ftrace_make_nop(struct module *mod,
 {
unsigned long entry, ptr, tramp;
unsigned long ip = rec->ip;
-   unsigned int op, pop;
+   unsigned int op;
 
 	/* read where this goes */

if (probe_kernel_read(, (void *)ip, sizeof(int))) {
@@ -160,8 +160,6 @@ __ftrace_make_nop(struct module *mod,
 
 #ifdef CONFIG_MPROFILE_KERNEL

/* When using -mkernel_profile there is no load to jump over */
-   pop = PPC_INST_NOP;
-
if (probe_kernel_read(, (void *)(ip - 4), 4)) {
pr_err("Fetching instruction at %lx failed.\n", ip - 4);
return -EFAULT;
@@ -169,26 +167,23 @@ __ftrace_make_nop(struct module *mod,
 
 	/* We expect either a mflr r0, or a std r0, LRSAVE(r1) */

if (op != PPC_INST_MFLR && op != PPC_INST_STD_LR) {
-   pr_err("Unexpected instruction %08x around bl _mcount\n", op);
+   pr_err("Unexpected instruction %08x before bl _mcount\n", op);
return -EINVAL;
}
-#else
-   /*
-* Our original call site looks like:
-*
-* bl 
-* ld r2,XX(r1)
-*
-* Milton Miller pointed out that we can not simply nop the branch.
-* If a task was preempted when calling a trace function, the nops
-* will remove the way to restore the TOC in r2 and the r2 TOC will
-* get corrupted.
-*
-* Use a b +8 to jump over the load.
-*/
 
-	pop = PPC_INST_BRANCH | 8;	/* b +8 */

+   /* We should patch out the bl to _mcount first */
+   if (patch_instruction((unsigned int *)ip, PPC_INST_NOP)) {
+   pr_err("Patching NOP failed.\n");
+   return -EPERM;
+   }
 
+	/* then, nop out the preceding 'mflr r0' as an optimization */

+   if (op == PPC_INST_MFLR &&
+   patch_instruction((unsigned int *)(ip - 4), PPC_INST_NOP)) {
+   pr_err("Patching NOP failed.\n");
+   return -EPERM;
+   }
+#else
/*
 * Check what is in the next instruction. We can see ld r2,40(r1), but
 * on first pass after boot we will see mflr r0.
@@ -202,12 +197,25 @@ __ftrace_make_nop(struct module *mod,
pr_err("Expected %08x found %08x\n", PPC_INST_LD_TOC, op);
return -EINVAL;
}
-#endif /* CONFIG_MPROFILE_KERNEL */
 
-	if (patch_instruction((unsigned int *)ip, pop)) {

+   /*
+* Our original call site looks like:
+*
+* bl 
+* ld r2,XX(r1)
+ 

Re: [PATCH v2 4/7] powerpc/ftrace: Additionally nop out the preceding mflr with -mprofile-kernel

2019-06-27 Thread Naveen N. Rao

Naveen N. Rao wrote:

With -mprofile-kernel, gcc emits 'mflr r0', followed by 'bl _mcount' to
enable function tracing and profiling. So far, with dynamic ftrace, we
used to only patch out the branch to _mcount(). However, mflr is
executed by the branch unit that can only execute one per cycle on
POWER9 and shared with branches, so it would be nice to avoid it where
possible.

We cannot simply nop out the mflr either. When enabling function
tracing, there can be a race if tracing is enabled when some thread was
interrupted after executing a nop'ed out mflr. In this case, the thread
would execute the now-patched-in branch to _mcount() without having
executed the preceding mflr.

To solve this, we now enable function tracing in 2 steps: patch in the
mflr instruction, use 'smp_call_function(isync);
synchronize_rcu_tasks()' to ensure all existing threads make progress,
and then patch in the branch to _mcount(). We override
ftrace_replace_code() with a powerpc64 variant for this purpose.

Suggested-by: Nicholas Piggin 
Reviewed-by: Nicholas Piggin 
Signed-off-by: Naveen N. Rao 
---
 arch/powerpc/kernel/trace/ftrace.c | 258 ++---
 1 file changed, 236 insertions(+), 22 deletions(-)



I missed adding a comment here to explain the changes. As discussed in 
the previous series, I think we are ok with this patch from a CMODX 
perspective. For smp_call_function(), I decided to have it included in 
this patch since we know that we need it here for sure. I am not 
entirely sure we want to do that in patch_instruction() since ftrace 
doesn't seem to need it elsewhere. As Nick Piggin pointed out, we may 
want to have users of patch_instruction() (kprobes) add the necessary 
synchronization.



Thanks,
Naveen




Re: [PATCH v2 2/7] x86/ftrace: Fix use of flags in ftrace_replace_code()

2019-06-27 Thread Naveen N. Rao

Steven Rostedt wrote:

On Thu, 27 Jun 2019 16:53:50 +0530
"Naveen N. Rao"  wrote:


In commit a0572f687fb3c ("ftrace: Allow ftrace_replace_code() to be
schedulable), the generic ftrace_replace_code() function was modified to
accept a flags argument in place of a single 'enable' flag. However, the
x86 version of this function was not updated. Fix the same.

Fixes: a0572f687fb3c ("ftrace: Allow ftrace_replace_code() to be schedulable")


I don't mind this change, but it's not a bug, and I'm not sure it
should have the fixes tag. The reason being, the
FTRACE_MODIFY_ENABLE_FL is only set when ftrace is called by with the
command flag FTRACE_MAY_SLEEP, which is never done on x86.


I guess you meant to say that *FTRACE_MODIFY_MAY_SLEEP_FL* is only set 
with FTRACE_MAY_SLEEP.




That said, I'm fine with the change as it makes it more robust, but by
adding the fixes tag, you're going to get this into all the stable
code, and I'm not sure that's really necessary.


Agreed. Thanks for pointing this out. We can drop this patch from this 
series and I will re-post this as a simpler cleanup later on.



Thanks,
Naveen




Re: [PATCH v2 2/7] x86/ftrace: Fix use of flags in ftrace_replace_code()

2019-06-27 Thread Naveen N. Rao

Naveen N. Rao wrote:

In commit a0572f687fb3c ("ftrace: Allow ftrace_replace_code() to be
schedulable), the generic ftrace_replace_code() function was modified to
accept a flags argument in place of a single 'enable' flag. However, the
x86 version of this function was not updated. Fix the same.

Fixes: a0572f687fb3c ("ftrace: Allow ftrace_replace_code() to be schedulable")
Signed-off-by: Naveen N. Rao 
---
 arch/x86/kernel/ftrace.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)


If the rest of this series is ok, and if Ingo and Steven are ok to have 
this series go through the powerpc tree, then I can re-post this 
particular patch for x86 after -rc1.


Thanks,
Naveen




[PATCH v2 6/7] kprobes/ftrace: Use ftrace_location() when [dis]arming probes

2019-06-27 Thread Naveen N. Rao
Ftrace location could include more than a single instruction in case of
some architectures (powerpc64, for now). In this case, kprobe is
permitted on any of those instructions, and uses ftrace infrastructure
for functioning.

However, [dis]arm_kprobe_ftrace() uses the kprobe address when setting
up ftrace filter IP. This won't work if the address points to any
instruction apart from the one that has a branch to _mcount(). To
resolve this, have [dis]arm_kprobe_ftrace() use ftrace_function() to
identify the filter IP.

Acked-by: Masami Hiramatsu 
Signed-off-by: Naveen N. Rao 
---
 kernel/kprobes.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 445337c107e0..282ee704e2d8 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -978,10 +978,10 @@ static int prepare_kprobe(struct kprobe *p)
 /* Caller must lock kprobe_mutex */
 static int arm_kprobe_ftrace(struct kprobe *p)
 {
+   unsigned long ftrace_ip = ftrace_location((unsigned long)p->addr);
int ret = 0;
 
-   ret = ftrace_set_filter_ip(_ftrace_ops,
-  (unsigned long)p->addr, 0, 0);
+   ret = ftrace_set_filter_ip(_ftrace_ops, ftrace_ip, 0, 0);
if (ret) {
pr_debug("Failed to arm kprobe-ftrace at %pS (%d)\n",
 p->addr, ret);
@@ -1005,13 +1005,14 @@ static int arm_kprobe_ftrace(struct kprobe *p)
 * non-empty filter_hash for IPMODIFY ops, we're safe from an accidental
 * empty filter_hash which would undesirably trace all functions.
 */
-   ftrace_set_filter_ip(_ftrace_ops, (unsigned long)p->addr, 1, 0);
+   ftrace_set_filter_ip(_ftrace_ops, ftrace_ip, 1, 0);
return ret;
 }
 
 /* Caller must lock kprobe_mutex */
 static int disarm_kprobe_ftrace(struct kprobe *p)
 {
+   unsigned long ftrace_ip = ftrace_location((unsigned long)p->addr);
int ret = 0;
 
if (kprobe_ftrace_enabled == 1) {
@@ -1022,8 +1023,7 @@ static int disarm_kprobe_ftrace(struct kprobe *p)
 
kprobe_ftrace_enabled--;
 
-   ret = ftrace_set_filter_ip(_ftrace_ops,
-  (unsigned long)p->addr, 1, 0);
+   ret = ftrace_set_filter_ip(_ftrace_ops, ftrace_ip, 1, 0);
WARN_ONCE(ret < 0, "Failed to disarm kprobe-ftrace at %pS (%d)\n",
  p->addr, ret);
return ret;
-- 
2.22.0



[PATCH v2 5/7] ftrace: Update ftrace_location() for powerpc -mprofile-kernel

2019-06-27 Thread Naveen N. Rao
Now that we are patching the preceding 'mflr r0' instruction with
-mprofile-kernel, we need to update ftrace_location() to recognise that
as being part of ftrace.

To do this, we introduce FTRACE_IP_EXTENSION to denote the length (in
bytes) of the mcount caller. By default, this is set to 0. For powerpc
with CONFIG_MPROFILE_KERNEL, we set this to MCOUNT_INSN_SIZE so that
this works if ftrace_location() is called with the address of the actual
ftrace entry call, or with the address of the preceding 'mflr r0'
instruction.

Note that we do not check if the preceding instruction is indeed the
'mflr r0'. Earlier -mprofile-kernel ABI included a 'std r0,stack'
instruction between the 'mflr r0' and the 'bl _mcount'. This is harmless
as the 'std r0,stack' instruction is inconsequential and is not relied
upon.

Suggested-by: Steven Rostedt (VMware) 
Signed-off-by: Naveen N. Rao 
---
 arch/powerpc/include/asm/ftrace.h | 8 
 include/linux/ftrace.h| 9 +
 kernel/trace/ftrace.c | 2 +-
 3 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/ftrace.h 
b/arch/powerpc/include/asm/ftrace.h
index 3dfb80b86561..510a8ac8ac8d 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -61,6 +61,14 @@ struct dyn_arch_ftrace {
 
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
 #define ARCH_SUPPORTS_FTRACE_OPS 1
+
+#ifdef CONFIG_MPROFILE_KERNEL
+/*
+ * We consider two instructions -- 'mflr r0', 'bl _mcount' -- to be part
+ * of ftrace with -mprofile-kernel
+ */
+#define FTRACE_IP_EXTENSIONMCOUNT_INSN_SIZE
+#endif
 #endif
 #endif /* CONFIG_FUNCTION_TRACER */
 
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index fa653a561da5..310b514438cb 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -29,6 +29,15 @@
 #define ARCH_SUPPORTS_FTRACE_OPS 0
 #endif
 
+/*
+ * This denotes the number of instructions (in bytes) that is used by the
+ * arch mcount caller. All instructions in this range will be owned by
+ * ftrace.
+ */
+#ifndef FTRACE_IP_EXTENSION
+#define FTRACE_IP_EXTENSION 0
+#endif
+
 /*
  * If the arch's mcount caller does not support all of ftrace's
  * features, then it must call an indirect function that
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 21d8e201ee80..308555925b81 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1575,7 +1575,7 @@ unsigned long ftrace_location_range(unsigned long start, 
unsigned long end)
  */
 unsigned long ftrace_location(unsigned long ip)
 {
-   return ftrace_location_range(ip, ip);
+   return ftrace_location_range(ip, ip + FTRACE_IP_EXTENSION);
 }
 
 /**
-- 
2.22.0



[PATCH v2 2/7] x86/ftrace: Fix use of flags in ftrace_replace_code()

2019-06-27 Thread Naveen N. Rao
In commit a0572f687fb3c ("ftrace: Allow ftrace_replace_code() to be
schedulable), the generic ftrace_replace_code() function was modified to
accept a flags argument in place of a single 'enable' flag. However, the
x86 version of this function was not updated. Fix the same.

Fixes: a0572f687fb3c ("ftrace: Allow ftrace_replace_code() to be schedulable")
Signed-off-by: Naveen N. Rao 
---
 arch/x86/kernel/ftrace.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 0927bb158ffc..f34005a17051 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -573,8 +573,9 @@ static void run_sync(void)
local_irq_disable();
 }
 
-void ftrace_replace_code(int enable)
+void ftrace_replace_code(int mod_flags)
 {
+   int enable = mod_flags & FTRACE_MODIFY_ENABLE_FL;
struct ftrace_rec_iter *iter;
struct dyn_ftrace *rec;
const char *report = "adding breakpoints";
-- 
2.22.0



[PATCH v2 0/7] powerpc/ftrace: Patch out -mprofile-kernel instructions

2019-06-27 Thread Naveen N. Rao
Changes since v1 
(https://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=114556):
- Patches 1,2,3 and 6: No changes
- Patch 4: Add smp_call_function() to flush icache on all cpus after 
  patching in the 'mflr r0' instruction.
- Patch 5: Changes as per Steven Rostedt's suggestions.
- Patch 7: Changes as per Masami and Joe Perches.

--
On powerpc64, -mprofile-kernel results in two instructions being 
emitted: 'mflr r0' and 'bl _mcount'. So far, we were only nop'ing out 
the branch to _mcount(). This series implements an approach to also nop 
out the preceding mflr.


- Naveen


Naveen N. Rao (7):
  ftrace: Expose flags used for ftrace_replace_code()
  x86/ftrace: Fix use of flags in ftrace_replace_code()
  ftrace: Expose __ftrace_replace_code()
  powerpc/ftrace: Additionally nop out the preceding mflr with
-mprofile-kernel
  ftrace: Update ftrace_location() for powerpc -mprofile-kernel
  kprobes/ftrace: Use ftrace_location() when [dis]arming probes
  powerpc/kprobes: Allow probing on any ftrace address

 arch/powerpc/include/asm/ftrace.h|   8 +
 arch/powerpc/kernel/kprobes-ftrace.c |  32 +++-
 arch/powerpc/kernel/trace/ftrace.c   | 258 ---
 arch/x86/kernel/ftrace.c |   3 +-
 include/linux/ftrace.h   |  15 ++
 kernel/kprobes.c |  10 +-
 kernel/trace/ftrace.c|  15 +-
 7 files changed, 302 insertions(+), 39 deletions(-)

-- 
2.22.0



[PATCH v2 1/7] ftrace: Expose flags used for ftrace_replace_code()

2019-06-27 Thread Naveen N. Rao
Since ftrace_replace_code() is a __weak function and can be overridden,
we need to expose the flags that can be set. So, move the flags enum to
the header file.

Reviewed-by: Steven Rostedt (VMware) 
Signed-off-by: Naveen N. Rao 
---
 include/linux/ftrace.h | 5 +
 kernel/trace/ftrace.c  | 5 -
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 25e2995d4a4c..e97789c95c4e 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -162,6 +162,11 @@ enum {
FTRACE_OPS_FL_TRACE_ARRAY   = 1 << 15,
 };
 
+enum {
+   FTRACE_MODIFY_ENABLE_FL = (1 << 0),
+   FTRACE_MODIFY_MAY_SLEEP_FL  = (1 << 1),
+};
+
 #ifdef CONFIG_DYNAMIC_FTRACE
 /* The hash used to know what functions callbacks trace */
 struct ftrace_ops_hash {
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 38277af44f5c..5710a6b3edc1 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -75,11 +75,6 @@
 #define INIT_OPS_HASH(opsname)
 #endif
 
-enum {
-   FTRACE_MODIFY_ENABLE_FL = (1 << 0),
-   FTRACE_MODIFY_MAY_SLEEP_FL  = (1 << 1),
-};
-
 struct ftrace_ops ftrace_list_end __read_mostly = {
.func   = ftrace_stub,
.flags  = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_STUB,
-- 
2.22.0



[PATCH v2 7/7] powerpc/kprobes: Allow probing on any ftrace address

2019-06-27 Thread Naveen N. Rao
With KPROBES_ON_FTRACE, kprobe is allowed to be inserted on instructions
that branch to _mcount (referred to as ftrace location). With
-mprofile-kernel, we now include the preceding 'mflr r0' as being part
of the ftrace location.

However, by default, probing on an instruction that is not actually the
branch to _mcount() is prohibited, as that is considered to not be at an
instruction boundary. This is not the case on powerpc, so allow the same
by overriding arch_check_ftrace_location()

In addition, we update kprobe_ftrace_handler() to detect this scenarios
and to pass the proper nip to the pre and post probe handlers.

Signed-off-by: Naveen N. Rao 
---
 arch/powerpc/kernel/kprobes-ftrace.c | 32 +++-
 1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/kprobes-ftrace.c 
b/arch/powerpc/kernel/kprobes-ftrace.c
index 972cb28174b2..23c840748183 100644
--- a/arch/powerpc/kernel/kprobes-ftrace.c
+++ b/arch/powerpc/kernel/kprobes-ftrace.c
@@ -12,15 +12,35 @@
 #include 
 #include 
 
+/*
+ * With -mprofile-kernel, we patch two instructions -- the branch to _mcount
+ * as well as the preceding 'mflr r0'. Both these instructions are claimed
+ * by ftrace and we should allow probing on either instruction.
+ */
+int arch_check_ftrace_location(struct kprobe *p)
+{
+   if (ftrace_location((unsigned long)p->addr))
+   p->flags |= KPROBE_FLAG_FTRACE;
+   return 0;
+}
+
 /* Ftrace callback handler for kprobes */
 void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip,
   struct ftrace_ops *ops, struct pt_regs *regs)
 {
struct kprobe *p;
+   int mflr_kprobe = 0;
struct kprobe_ctlblk *kcb;
 
p = get_kprobe((kprobe_opcode_t *)nip);
-   if (unlikely(!p) || kprobe_disabled(p))
+   if (!p) {
+   p = get_kprobe((kprobe_opcode_t *)(nip - MCOUNT_INSN_SIZE));
+   if (unlikely(!p))
+   return;
+   mflr_kprobe = 1;
+   }
+
+   if (kprobe_disabled(p))
return;
 
kcb = get_kprobe_ctlblk();
@@ -33,6 +53,9 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long 
parent_nip,
 */
regs->nip -= MCOUNT_INSN_SIZE;
 
+   if (mflr_kprobe)
+   regs->nip -= MCOUNT_INSN_SIZE;
+
__this_cpu_write(current_kprobe, p);
kcb->kprobe_status = KPROBE_HIT_ACTIVE;
if (!p->pre_handler || !p->pre_handler(p, regs)) {
@@ -45,6 +68,8 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long 
parent_nip,
kcb->kprobe_status = KPROBE_HIT_SSDONE;
p->post_handler(p, regs, 0);
}
+   if (mflr_kprobe)
+   regs->nip += MCOUNT_INSN_SIZE;
}
/*
 * If pre_handler returns !0, it changes regs->nip. We have to
@@ -57,6 +82,11 @@ NOKPROBE_SYMBOL(kprobe_ftrace_handler);
 
 int arch_prepare_kprobe_ftrace(struct kprobe *p)
 {
+   if ((unsigned long)p->addr & 0x03) {
+   pr_err("Attempt to register kprobe at an unaligned address\n");
+   return -EILSEQ;
+   }
+
p->ainsn.insn = NULL;
p->ainsn.boostable = -1;
return 0;
-- 
2.22.0



[PATCH v2 4/7] powerpc/ftrace: Additionally nop out the preceding mflr with -mprofile-kernel

2019-06-27 Thread Naveen N. Rao
With -mprofile-kernel, gcc emits 'mflr r0', followed by 'bl _mcount' to
enable function tracing and profiling. So far, with dynamic ftrace, we
used to only patch out the branch to _mcount(). However, mflr is
executed by the branch unit that can only execute one per cycle on
POWER9 and shared with branches, so it would be nice to avoid it where
possible.

We cannot simply nop out the mflr either. When enabling function
tracing, there can be a race if tracing is enabled when some thread was
interrupted after executing a nop'ed out mflr. In this case, the thread
would execute the now-patched-in branch to _mcount() without having
executed the preceding mflr.

To solve this, we now enable function tracing in 2 steps: patch in the
mflr instruction, use 'smp_call_function(isync);
synchronize_rcu_tasks()' to ensure all existing threads make progress,
and then patch in the branch to _mcount(). We override
ftrace_replace_code() with a powerpc64 variant for this purpose.

Suggested-by: Nicholas Piggin 
Reviewed-by: Nicholas Piggin 
Signed-off-by: Naveen N. Rao 
---
 arch/powerpc/kernel/trace/ftrace.c | 258 ++---
 1 file changed, 236 insertions(+), 22 deletions(-)

diff --git a/arch/powerpc/kernel/trace/ftrace.c 
b/arch/powerpc/kernel/trace/ftrace.c
index 517662a56bdc..86c2b50dcaa9 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -125,7 +125,7 @@ __ftrace_make_nop(struct module *mod,
 {
unsigned long entry, ptr, tramp;
unsigned long ip = rec->ip;
-   unsigned int op, pop;
+   unsigned int op;
 
/* read where this goes */
if (probe_kernel_read(, (void *)ip, sizeof(int))) {
@@ -160,8 +160,6 @@ __ftrace_make_nop(struct module *mod,
 
 #ifdef CONFIG_MPROFILE_KERNEL
/* When using -mkernel_profile there is no load to jump over */
-   pop = PPC_INST_NOP;
-
if (probe_kernel_read(, (void *)(ip - 4), 4)) {
pr_err("Fetching instruction at %lx failed.\n", ip - 4);
return -EFAULT;
@@ -169,26 +167,23 @@ __ftrace_make_nop(struct module *mod,
 
/* We expect either a mflr r0, or a std r0, LRSAVE(r1) */
if (op != PPC_INST_MFLR && op != PPC_INST_STD_LR) {
-   pr_err("Unexpected instruction %08x around bl _mcount\n", op);
+   pr_err("Unexpected instruction %08x before bl _mcount\n", op);
return -EINVAL;
}
-#else
-   /*
-* Our original call site looks like:
-*
-* bl 
-* ld r2,XX(r1)
-*
-* Milton Miller pointed out that we can not simply nop the branch.
-* If a task was preempted when calling a trace function, the nops
-* will remove the way to restore the TOC in r2 and the r2 TOC will
-* get corrupted.
-*
-* Use a b +8 to jump over the load.
-*/
 
-   pop = PPC_INST_BRANCH | 8;  /* b +8 */
+   /* We should patch out the bl to _mcount first */
+   if (patch_instruction((unsigned int *)ip, PPC_INST_NOP)) {
+   pr_err("Patching NOP failed.\n");
+   return -EPERM;
+   }
 
+   /* then, nop out the preceding 'mflr r0' as an optimization */
+   if (op == PPC_INST_MFLR &&
+   patch_instruction((unsigned int *)(ip - 4), PPC_INST_NOP)) {
+   pr_err("Patching NOP failed.\n");
+   return -EPERM;
+   }
+#else
/*
 * Check what is in the next instruction. We can see ld r2,40(r1), but
 * on first pass after boot we will see mflr r0.
@@ -202,12 +197,25 @@ __ftrace_make_nop(struct module *mod,
pr_err("Expected %08x found %08x\n", PPC_INST_LD_TOC, op);
return -EINVAL;
}
-#endif /* CONFIG_MPROFILE_KERNEL */
 
-   if (patch_instruction((unsigned int *)ip, pop)) {
+   /*
+* Our original call site looks like:
+*
+* bl 
+* ld r2,XX(r1)
+*
+* Milton Miller pointed out that we can not simply nop the branch.
+* If a task was preempted when calling a trace function, the nops
+* will remove the way to restore the TOC in r2 and the r2 TOC will
+* get corrupted.
+*
+* Use a b +8 to jump over the load.
+*/
+   if (patch_instruction((unsigned int *)ip, PPC_INST_BRANCH | 8)) {
pr_err("Patching NOP failed.\n");
return -EPERM;
}
+#endif /* CONFIG_MPROFILE_KERNEL */
 
return 0;
 }
@@ -421,6 +429,26 @@ static int __ftrace_make_nop_kernel(struct dyn_ftrace 
*rec, unsigned long addr)
return -EPERM;
}
 
+#ifdef CONFIG_MPROFILE_KERNEL
+   /* Nop out the preceding 'mflr r0' as an optimization */
+   if (probe_kernel_read(, (void *)(ip - 4), 4)) {
+   pr_err("Fetching instruction at %lx failed.\n", ip - 4);
+ 

[PATCH v2 3/7] ftrace: Expose __ftrace_replace_code()

2019-06-27 Thread Naveen N. Rao
While over-riding ftrace_replace_code(), we still want to reuse the
existing __ftrace_replace_code() function. Rename the function and
make it available for other kernel code.

Signed-off-by: Naveen N. Rao 
---
 include/linux/ftrace.h | 1 +
 kernel/trace/ftrace.c  | 8 
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index e97789c95c4e..fa653a561da5 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -456,6 +456,7 @@ ftrace_set_early_filter(struct ftrace_ops *ops, char *buf, 
int enable);
 /* defined in arch */
 extern int ftrace_ip_converted(unsigned long ip);
 extern int ftrace_dyn_arch_init(void);
+extern int ftrace_replace_code_rec(struct dyn_ftrace *rec, int enable);
 extern void ftrace_replace_code(int enable);
 extern int ftrace_update_ftrace_func(ftrace_func_t func);
 extern void ftrace_caller(void);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 5710a6b3edc1..21d8e201ee80 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2351,8 +2351,8 @@ unsigned long ftrace_get_addr_curr(struct dyn_ftrace *rec)
return (unsigned long)FTRACE_ADDR;
 }
 
-static int
-__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
+int
+ftrace_replace_code_rec(struct dyn_ftrace *rec, int enable)
 {
unsigned long ftrace_old_addr;
unsigned long ftrace_addr;
@@ -2403,7 +2403,7 @@ void __weak ftrace_replace_code(int mod_flags)
if (rec->flags & FTRACE_FL_DISABLED)
continue;
 
-   failed = __ftrace_replace_code(rec, enable);
+   failed = ftrace_replace_code_rec(rec, enable);
if (failed) {
ftrace_bug(failed, rec);
/* Stop processing */
@@ -5827,7 +5827,7 @@ void ftrace_module_enable(struct module *mod)
rec->flags = cnt;
 
if (ftrace_start_up && cnt) {
-   int failed = __ftrace_replace_code(rec, 1);
+   int failed = ftrace_replace_code_rec(rec, 1);
if (failed) {
ftrace_bug(failed, rec);
goto out_loop;
-- 
2.22.0



[PATCH] recordmcount: Fix spurious mcount entries on powerpc

2019-06-26 Thread Naveen N. Rao
The recent change enabling HAVE_C_RECORDMCOUNT on powerpc started
showing the following issue:

  # modprobe kprobe_example
   ftrace-powerpc: Not expected bl: opcode is 3c4c0001
   WARNING: CPU: 0 PID: 227 at kernel/trace/ftrace.c:2001 ftrace_bug+0x90/0x318
   Modules linked in:
   CPU: 0 PID: 227 Comm: modprobe Not tainted 5.2.0-rc6-00678-g1c329100b942 #2
   NIP:  c0264318 LR: c025d694 CTR: c0f5cd30
   REGS: c1f2b7b0 TRAP: 0700   Not tainted  
(5.2.0-rc6-00678-g1c329100b942)
   MSR:  90010282b033   CR: 
28228222  XER: 
   CFAR: c02642fc IRQMASK: 0
   
   NIP [c0264318] ftrace_bug+0x90/0x318
   LR [c025d694] ftrace_process_locs+0x4f4/0x5e0
   Call Trace:
   [c1f2ba40] [0004] 0x4 (unreliable)
   [c1f2bad0] [c025d694] ftrace_process_locs+0x4f4/0x5e0
   [c1f2bb90] [c020ff10] load_module+0x25b0/0x30c0
   [c1f2bd00] [c0210cb0] sys_finit_module+0xc0/0x130
   [c1f2be20] [c000bda4] system_call+0x5c/0x70
   Instruction dump:
   419e0018 2f83 419e00bc 2f83ffea 409e00cc 481c 0fe0 3c62ff96
   3901 3940 386386d0 48c4 <0fe0> 3ce20003 3901 3c62ff96
   ---[ end trace 4c438d5cebf78381 ]---
   ftrace failed to modify
   [] 0xc008012a0008
actual:   01:00:4c:3c
   Initializing ftrace call sites
   ftrace record flags: 200
(0)
expected tramp: c006af4c

Looking at the relocation records in __mcount_loc showed a few spurious
entries:
  RELOCATION RECORDS FOR [__mcount_loc]:
  OFFSET   TYPE  VALUE
   R_PPC64_ADDR64.text.unlikely+0x0008
  0008 R_PPC64_ADDR64.text.unlikely+0x0014
  0010 R_PPC64_ADDR64.text.unlikely+0x0060
  0018 R_PPC64_ADDR64.text.unlikely+0x00b4
  0020 R_PPC64_ADDR64.init.text+0x0008
  0028 R_PPC64_ADDR64.init.text+0x0014

The first entry in each section is incorrect. Looking at the relocation
records, the spurious entries correspond to the R_PPC64_ENTRY records:
  RELOCATION RECORDS FOR [.text.unlikely]:
  OFFSET   TYPE  VALUE
   R_PPC64_REL64 .TOC.-0x0008
  0008 R_PPC64_ENTRY *ABS*
  0014 R_PPC64_REL24 _mcount
  

The problem is that we are not validating the return value from
get_mcountsym() in sift_rel_mcount(). With this entry, mcountsym is 0,
but Elf_r_sym(relp) also ends up being 0. Fix this by ensuring mcountsym
is valid before processing the entry.

Fixes: c7d64b560ce80 ("powerpc/ftrace: Enable C Version of recordmcount")
Signed-off-by: Naveen N. Rao 
---
 scripts/recordmcount.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/recordmcount.h b/scripts/recordmcount.h
index 13c5e6c8829c..47fca2c69a73 100644
--- a/scripts/recordmcount.h
+++ b/scripts/recordmcount.h
@@ -325,7 +325,8 @@ static uint_t *sift_rel_mcount(uint_t *mlocp,
if (!mcountsym)
mcountsym = get_mcountsym(sym0, relp, str0);
 
-   if (mcountsym == Elf_r_sym(relp) && !is_fake_mcount(relp)) {
+   if (mcountsym && mcountsym == Elf_r_sym(relp) &&
+   !is_fake_mcount(relp)) {
uint_t const addend =
_w(_w(relp->r_offset) - recval + mcount_adjust);
mrelp->r_offset = _w(offbase
-- 
2.22.0



Re: [PATCH 7/7] powerpc/kprobes: Allow probing on any ftrace address

2019-06-26 Thread Naveen N. Rao

Masami Hiramatsu wrote:

On Tue, 18 Jun 2019 20:17:06 +0530
"Naveen N. Rao"  wrote:


With KPROBES_ON_FTRACE, kprobe is allowed to be inserted on instructions
that branch to _mcount (referred to as ftrace location). With
-mprofile-kernel, we now include the preceding 'mflr r0' as being part
of the ftrace location.

However, by default, probing on an instruction that is not actually the
branch to _mcount() is prohibited, as that is considered to not be at an
instruction boundary. This is not the case on powerpc, so allow the same
by overriding arch_check_ftrace_location()

In addition, we update kprobe_ftrace_handler() to detect this scenarios
and to pass the proper nip to the pre and post probe handlers.

Signed-off-by: Naveen N. Rao 
---
 arch/powerpc/kernel/kprobes-ftrace.c | 30 
 1 file changed, 30 insertions(+)

diff --git a/arch/powerpc/kernel/kprobes-ftrace.c 
b/arch/powerpc/kernel/kprobes-ftrace.c
index 972cb28174b2..6a0bd3c16cb6 100644
--- a/arch/powerpc/kernel/kprobes-ftrace.c
+++ b/arch/powerpc/kernel/kprobes-ftrace.c
@@ -12,14 +12,34 @@
 #include 
 #include 
 
+/*

+ * With -mprofile-kernel, we patch two instructions -- the branch to _mcount
+ * as well as the preceding 'mflr r0'. Both these instructions are claimed
+ * by ftrace and we should allow probing on either instruction.
+ */
+int arch_check_ftrace_location(struct kprobe *p)
+{
+   if (ftrace_location((unsigned long)p->addr))
+   p->flags |= KPROBE_FLAG_FTRACE;
+   return 0;
+}
+
 /* Ftrace callback handler for kprobes */
 void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip,
   struct ftrace_ops *ops, struct pt_regs *regs)
 {
struct kprobe *p;
+   int mflr_kprobe = 0;
struct kprobe_ctlblk *kcb;
 
 	p = get_kprobe((kprobe_opcode_t *)nip);

+   if (unlikely(!p)) {


Hmm, is this really unlikely? If we put a kprobe on the second instruction 
address,
we will see p == NULL always.


+   p = get_kprobe((kprobe_opcode_t *)(nip - MCOUNT_INSN_SIZE));
+   if (!p)


Here will be unlikely, because we can not find kprobe at both of nip and
nip - MCOUNT_INSN_SIZE.


+   return;
+   mflr_kprobe = 1;
+   }
+
if (unlikely(!p) || kprobe_disabled(p))


"unlikely(!p)" is not needed here.


...

Joe Perches wrote:

On Fri, 2019-06-21 at 23:50 +0900, Masami Hiramatsu wrote:

On Tue, 18 Jun 2019 20:17:06 +0530
"Naveen N. Rao"  wrote:


trivia:


> diff --git a/arch/powerpc/kernel/kprobes-ftrace.c 
b/arch/powerpc/kernel/kprobes-ftrace.c

[]

> @@ -57,6 +82,11 @@ NOKPROBE_SYMBOL(kprobe_ftrace_handler);
>  
>  int arch_prepare_kprobe_ftrace(struct kprobe *p)

>  {
> +  if ((unsigned long)p->addr & 0x03) {
> +  printk("Attempt to register kprobe at an unaligned address\n");


Please use the appropriate KERN_ or pr_



All good points. Thanks for the review.


- Naveen




Re: [PATCH 4/7] powerpc/ftrace: Additionally nop out the preceding mflr with -mprofile-kernel

2019-06-19 Thread Naveen N. Rao

Nicholas Piggin wrote:

Naveen N. Rao's on June 19, 2019 7:53 pm:

Nicholas Piggin wrote:

Michael Ellerman's on June 19, 2019 3:14 pm:


I'm also not convinced the ordering between the two patches is
guaranteed by the ISA, given that there's possibly no isync on the other
CPU.


Will they go through a context synchronizing event?

synchronize_rcu_tasks() should ensure a thread is scheduled away, but
I'm not actually sure it guarantees CSI if it's kernel->kernel. Could
do a smp_call_function to do the isync on each CPU to be sure.


Good point. Per 
Documentation/RCU/Design/Requirements/Requirements.html#Tasks RCU:
"The solution, in the form of Tasks RCU, is to have implicit read-side 
critical sections that are delimited by voluntary context switches, that 
is, calls to schedule(), cond_resched(), and synchronize_rcu_tasks(). In 
addition, transitions to and from userspace execution also delimit 
tasks-RCU read-side critical sections."


I suppose transitions to/from userspace, as well as calls to schedule() 
result in context synchronizing instruction being executed. But, if some 
tasks call cond_resched() and synchronize_rcu_tasks(), we probably won't 
have a CSI executed.


Also:
"In CONFIG_PREEMPT=n kernels, trampolines cannot be preempted, so these 
APIs map to call_rcu(), synchronize_rcu(), and rcu_barrier(), 
respectively."


In this scenario as well, I think we won't have a CSI executed in case 
of cond_resched().


Should we enhance patch_instruction() to handle that?


Well, not sure. Do we have many post-boot callers of it? Should
they take care of their own synchronization requirements?


Kprobes and ftrace are the two users (along with anything else that may 
use jump labels).


Looking at this from the CMODX perspective: the main example quoted of 
an erratic behavior is when any variant of the patched instruction 
causes an exception.


With ftrace, I think we are ok since we only ever patch a 'nop' or a 
'bl' (and the 'mflr' now), none of which should cause an exception. As 
such, the existing patch_instruction() should suffice.


However, with kprobes, we patch a 'trap' (or a branch in case of 
optprobes) on most instructions. I wonder if we should be issuing an 
'isync' on all cpus in this case. Or, even if that is sufficient or 
necessary.



Thanks,
Naveen




Re: [PATCH 4/7] powerpc/ftrace: Additionally nop out the preceding mflr with -mprofile-kernel

2019-06-19 Thread Naveen N. Rao

Nicholas Piggin wrote:

Michael Ellerman's on June 19, 2019 3:14 pm:

Hi Naveen,

Sorry I meant to reply to this earlier .. :/


No problem. Thanks for the questions.



"Naveen N. Rao"  writes:

With -mprofile-kernel, gcc emits 'mflr r0', followed by 'bl _mcount' to
enable function tracing and profiling. So far, with dynamic ftrace, we
used to only patch out the branch to _mcount(). However, mflr is
executed by the branch unit that can only execute one per cycle on
POWER9 and shared with branches, so it would be nice to avoid it where
possible.

We cannot simply nop out the mflr either. When enabling function
tracing, there can be a race if tracing is enabled when some thread was
interrupted after executing a nop'ed out mflr. In this case, the thread
would execute the now-patched-in branch to _mcount() without having
executed the preceding mflr.

To solve this, we now enable function tracing in 2 steps: patch in the
mflr instruction, use synchronize_rcu_tasks() to ensure all existing
threads make progress, and then patch in the branch to _mcount(). We
override ftrace_replace_code() with a powerpc64 variant for this
purpose.


According to the ISA we're not allowed to patch mflr at runtime. See the
section on "CMODX".


According to "quasi patch class" engineering note, we can patch
anything with a preferred nop. But that's written as an optional
facility, which we don't have a feature to test for.



Hmm... I wonder what the implications are. We've been patching in a 
'trap' for kprobes for a long time now, along with having to patch back 
the original instruction (which can be anything), when the probe is 
removed.




I'm also not convinced the ordering between the two patches is
guaranteed by the ISA, given that there's possibly no isync on the other
CPU.


Will they go through a context synchronizing event?

synchronize_rcu_tasks() should ensure a thread is scheduled away, but
I'm not actually sure it guarantees CSI if it's kernel->kernel. Could
do a smp_call_function to do the isync on each CPU to be sure.


Good point. Per 
Documentation/RCU/Design/Requirements/Requirements.html#Tasks RCU:
"The solution, in the form of Tasks RCU, is to have implicit read-side 
critical sections that are delimited by voluntary context switches, that 
is, calls to schedule(), cond_resched(), and synchronize_rcu_tasks(). In 
addition, transitions to and from userspace execution also delimit 
tasks-RCU read-side critical sections."


I suppose transitions to/from userspace, as well as calls to schedule() 
result in context synchronizing instruction being executed. But, if some 
tasks call cond_resched() and synchronize_rcu_tasks(), we probably won't 
have a CSI executed.


Also:
"In CONFIG_PREEMPT=n kernels, trampolines cannot be preempted, so these 
APIs map to call_rcu(), synchronize_rcu(), and rcu_barrier(), 
respectively."


In this scenario as well, I think we won't have a CSI executed in case 
of cond_resched().


Should we enhance patch_instruction() to handle that?


- Naveen



Re: [PATCH 5/7] powerpc/ftrace: Update ftrace_location() for powerpc -mprofile-kernel

2019-06-19 Thread Naveen N. Rao

Steven Rostedt wrote:

On Tue, 18 Jun 2019 23:53:11 +0530
"Naveen N. Rao"  wrote:


Naveen N. Rao wrote:
> Steven Rostedt wrote:  
>> On Tue, 18 Jun 2019 20:17:04 +0530

>> "Naveen N. Rao"  wrote:
>>   
>>> @@ -1551,7 +1551,7 @@ unsigned long ftrace_location_range(unsigned long start, unsigned long end)

>>>key.flags = end;/* overload flags, as it is unsigned long */
>>>  
>>>  	for (pg = ftrace_pages_start; pg; pg = pg->next) {

>>> -  if (end < pg->records[0].ip ||
>>> +		if (end <= pg->records[0].ip ||  
>> 
>> This breaks the algorithm. "end" is inclusive. That is, if you look for

>> a single byte, where "start" and "end" are the same, and it happens to
>> be the first ip on the pg page, it will be skipped, and not found.  
> 
> Thanks. It looks like I should be over-riding ftrace_location() instead.  
> I will update this patch.  

I think I will have ftrace own the two instruction range, regardless of 
whether the preceding instruction is a 'mflr r0' or not. This simplifies 
things and I don't see an issue with it as of now. I will do more 
testing to confirm.


- Naveen


--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -951,6 +951,16 @@ void arch_ftrace_update_code(int command)
 }
 
 #ifdef CONFIG_MPROFILE_KERNEL

+/*
+ * We consider two instructions -- 'mflr r0', 'bl _mcount' -- to be part
+ * of ftrace. When checking for the first instruction, we want to include
+ * the next instruction in the range check.
+ */
+unsigned long ftrace_location(unsigned long ip)
+{
+   return ftrace_location_range(ip, ip + MCOUNT_INSN_SIZE);
+}
+
 /* Returns 1 if we patched in the mflr */
 static int __ftrace_make_call_prep(struct dyn_ftrace *rec)
 {
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 21d8e201ee80..122e2bb4a739 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1573,7 +1573,7 @@ unsigned long ftrace_location_range(unsigned long start, 
unsigned long end)
  * the function tracer. It checks the ftrace internal tables to
  * determine if the address belongs or not.
  */
-unsigned long ftrace_location(unsigned long ip)
+unsigned long __weak ftrace_location(unsigned long ip)
 {
return ftrace_location_range(ip, ip);
 }


Actually, instead of making this a weak function, let's do this:


In include/ftrace.h:

#ifndef FTRACE_IP_EXTENSION
# define FTRACE_IP_EXTENSION0
#endif


In arch/powerpc/include/asm/ftrace.h

#define FTRACE_IP_EXTENSION MCOUNT_INSN_SIZE


Then we can just have:

unsigned long ftrace_location(unsigned long ip)
{
return ftrace_location_range(ip, ip + FTRACE_IP_EXTENSION);
}


Thanks, that's indeed nice. I hope you don't mind me adding your SOB for 
that.


- Naveen




Re: [PATCH 5/7] powerpc/ftrace: Update ftrace_location() for powerpc -mprofile-kernel

2019-06-18 Thread Naveen N. Rao

Naveen N. Rao wrote:

Steven Rostedt wrote:

On Tue, 18 Jun 2019 20:17:04 +0530
"Naveen N. Rao"  wrote:


@@ -1551,7 +1551,7 @@ unsigned long ftrace_location_range(unsigned long start, 
unsigned long end)
key.flags = end;/* overload flags, as it is unsigned long */
 
 	for (pg = ftrace_pages_start; pg; pg = pg->next) {

-   if (end < pg->records[0].ip ||
+   if (end <= pg->records[0].ip ||


This breaks the algorithm. "end" is inclusive. That is, if you look for
a single byte, where "start" and "end" are the same, and it happens to
be the first ip on the pg page, it will be skipped, and not found.


Thanks. It looks like I should be over-riding ftrace_location() instead.  
I will update this patch.


I think I will have ftrace own the two instruction range, regardless of 
whether the preceding instruction is a 'mflr r0' or not. This simplifies 
things and I don't see an issue with it as of now. I will do more 
testing to confirm.


- Naveen


--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -951,6 +951,16 @@ void arch_ftrace_update_code(int command)
}

#ifdef CONFIG_MPROFILE_KERNEL
+/*
+ * We consider two instructions -- 'mflr r0', 'bl _mcount' -- to be part
+ * of ftrace. When checking for the first instruction, we want to include
+ * the next instruction in the range check.
+ */
+unsigned long ftrace_location(unsigned long ip)
+{
+   return ftrace_location_range(ip, ip + MCOUNT_INSN_SIZE);
+}
+
/* Returns 1 if we patched in the mflr */
static int __ftrace_make_call_prep(struct dyn_ftrace *rec)
{
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 21d8e201ee80..122e2bb4a739 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1573,7 +1573,7 @@ unsigned long ftrace_location_range(unsigned long start, 
unsigned long end)
 * the function tracer. It checks the ftrace internal tables to
 * determine if the address belongs or not.
 */
-unsigned long ftrace_location(unsigned long ip)
+unsigned long __weak ftrace_location(unsigned long ip)
{
return ftrace_location_range(ip, ip);
}




Re: [PATCH 5/7] powerpc/ftrace: Update ftrace_location() for powerpc -mprofile-kernel

2019-06-18 Thread Naveen N. Rao

Steven Rostedt wrote:

On Tue, 18 Jun 2019 20:17:04 +0530
"Naveen N. Rao"  wrote:


@@ -1551,7 +1551,7 @@ unsigned long ftrace_location_range(unsigned long start, 
unsigned long end)
key.flags = end;/* overload flags, as it is unsigned long */
 
 	for (pg = ftrace_pages_start; pg; pg = pg->next) {

-   if (end < pg->records[0].ip ||
+   if (end <= pg->records[0].ip ||


This breaks the algorithm. "end" is inclusive. That is, if you look for
a single byte, where "start" and "end" are the same, and it happens to
be the first ip on the pg page, it will be skipped, and not found.


Thanks. It looks like I should be over-riding ftrace_location() instead.  
I will update this patch.


- Naveen




[PATCH 5/7] powerpc/ftrace: Update ftrace_location() for powerpc -mprofile-kernel

2019-06-18 Thread Naveen N. Rao
Now that we are patching the preceding 'mflr r0' instruction with
-mprofile-kernel, we need to update ftrace_location[_range]() to
recognise that as being part of ftrace. To do this, we make a small
change to ftrace_location_range() and convert ftrace_cmp_recs() into a
weak function. We implement a custom version of ftrace_cmp_recs() which
looks at the instruction preceding the branch to _mcount() and marks
that instruction as belonging to ftrace if it is a 'nop' or 'mflr r0'.

Signed-off-by: Naveen N. Rao 
---
 arch/powerpc/kernel/trace/ftrace.c | 31 ++
 include/linux/ftrace.h |  1 +
 kernel/trace/ftrace.c  |  4 ++--
 3 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/trace/ftrace.c 
b/arch/powerpc/kernel/trace/ftrace.c
index 5e2b29808af1..b84046e43207 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -951,6 +951,37 @@ void arch_ftrace_update_code(int command)
 }
 
 #ifdef CONFIG_MPROFILE_KERNEL
+/*
+ * We need to check if the previous instruction is a 'nop' or 'mflr r0'.
+ * If so, we will patch those subsequently and that instruction must be
+ * considered as part of ftrace.
+ */
+int ftrace_cmp_recs(const void *a, const void *b)
+{
+   const struct dyn_ftrace *key = a;
+   const struct dyn_ftrace *rec = b;
+   unsigned int op;
+
+   if (key->flags < rec->ip - MCOUNT_INSN_SIZE)
+   return -1;
+   if (key->ip >= rec->ip + MCOUNT_INSN_SIZE)
+   return 1;
+
+   if (key->flags > rec->ip)
+   return 0;
+
+   /* check the previous instruction */
+   if (probe_kernel_read(, (void *)rec->ip - MCOUNT_INSN_SIZE,
+   sizeof(op)))
+   /* assume we own it */
+   return 0;
+
+   if (op != PPC_INST_NOP && op != PPC_INST_MFLR)
+   return -1;
+
+   return 0;
+}
+
 /* Returns 1 if we patched in the mflr */
 static int __ftrace_make_call_prep(struct dyn_ftrace *rec)
 {
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index fa653a561da5..9941987bf510 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -435,6 +435,7 @@ struct dyn_ftrace *ftrace_rec_iter_record(struct 
ftrace_rec_iter *iter);
 int ftrace_update_record(struct dyn_ftrace *rec, int enable);
 int ftrace_test_record(struct dyn_ftrace *rec, int enable);
 void ftrace_run_stop_machine(int command);
+int ftrace_cmp_recs(const void *a, const void *b);
 unsigned long ftrace_location(unsigned long ip);
 unsigned long ftrace_location_range(unsigned long start, unsigned long end);
 unsigned long ftrace_get_addr_new(struct dyn_ftrace *rec);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 21d8e201ee80..b5c61db0b452 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1517,7 +1517,7 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, 
void *regs)
}
 
 
-static int ftrace_cmp_recs(const void *a, const void *b)
+int __weak ftrace_cmp_recs(const void *a, const void *b)
 {
const struct dyn_ftrace *key = a;
const struct dyn_ftrace *rec = b;
@@ -1551,7 +1551,7 @@ unsigned long ftrace_location_range(unsigned long start, 
unsigned long end)
key.flags = end;/* overload flags, as it is unsigned long */
 
for (pg = ftrace_pages_start; pg; pg = pg->next) {
-   if (end < pg->records[0].ip ||
+   if (end <= pg->records[0].ip ||
start >= (pg->records[pg->index - 1].ip + MCOUNT_INSN_SIZE))
continue;
rec = bsearch(, pg->records, pg->index,
-- 
2.22.0



[PATCH 6/7] kprobes/ftrace: Use ftrace_location() when [dis]arming probes

2019-06-18 Thread Naveen N. Rao
Ftrace location could include more than a single instruction in case of
some architectures (powerpc64, for now). In this case, kprobe is
permitted on any of those instructions, and uses ftrace infrastructure
for functioning.

However, [dis]arm_kprobe_ftrace() uses the kprobe address when setting
up ftrace filter IP. This won't work if the address points to any
instruction apart from the one that has a branch to _mcount(). To
resolve this, have [dis]arm_kprobe_ftrace() use ftrace_function() to
identify the filter IP.

Signed-off-by: Naveen N. Rao 
---
 kernel/kprobes.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 445337c107e0..282ee704e2d8 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -978,10 +978,10 @@ static int prepare_kprobe(struct kprobe *p)
 /* Caller must lock kprobe_mutex */
 static int arm_kprobe_ftrace(struct kprobe *p)
 {
+   unsigned long ftrace_ip = ftrace_location((unsigned long)p->addr);
int ret = 0;
 
-   ret = ftrace_set_filter_ip(_ftrace_ops,
-  (unsigned long)p->addr, 0, 0);
+   ret = ftrace_set_filter_ip(_ftrace_ops, ftrace_ip, 0, 0);
if (ret) {
pr_debug("Failed to arm kprobe-ftrace at %pS (%d)\n",
 p->addr, ret);
@@ -1005,13 +1005,14 @@ static int arm_kprobe_ftrace(struct kprobe *p)
 * non-empty filter_hash for IPMODIFY ops, we're safe from an accidental
 * empty filter_hash which would undesirably trace all functions.
 */
-   ftrace_set_filter_ip(_ftrace_ops, (unsigned long)p->addr, 1, 0);
+   ftrace_set_filter_ip(_ftrace_ops, ftrace_ip, 1, 0);
return ret;
 }
 
 /* Caller must lock kprobe_mutex */
 static int disarm_kprobe_ftrace(struct kprobe *p)
 {
+   unsigned long ftrace_ip = ftrace_location((unsigned long)p->addr);
int ret = 0;
 
if (kprobe_ftrace_enabled == 1) {
@@ -1022,8 +1023,7 @@ static int disarm_kprobe_ftrace(struct kprobe *p)
 
kprobe_ftrace_enabled--;
 
-   ret = ftrace_set_filter_ip(_ftrace_ops,
-  (unsigned long)p->addr, 1, 0);
+   ret = ftrace_set_filter_ip(_ftrace_ops, ftrace_ip, 1, 0);
WARN_ONCE(ret < 0, "Failed to disarm kprobe-ftrace at %pS (%d)\n",
  p->addr, ret);
return ret;
-- 
2.22.0



[PATCH 0/7] powerpc/ftrace: Patch out -mprofile-kernel instructions

2019-06-18 Thread Naveen N. Rao
Changes since RFC:
- Patches 1 and 2: No changes
- Patch 3: rename function to ftrace_replace_code_rec() to signify that 
  it acts on a ftrace record.
- Patch 4: numerous small changes, including those suggested by Nick 
  Piggin.
- Patch 4: additionally handle scenario where __ftrace_make_call() can 
  be invoked without having called __ftrace_make_call_prep(), when a 
  kernel module is loaded while function tracing is active.
- Patch 5 to 7: new, to have ftrace claim ownership of two instructions, 
  and to have kprobes work properly with this.


--
On powerpc64, -mprofile-kernel results in two instructions being 
emitted: 'mflr r0' and 'bl _mcount'. So far, we were only nop'ing out 
the branch to _mcount(). This series implements an approach to also nop 
out the preceding mflr.


- Naveen


Naveen N. Rao (7):
  ftrace: Expose flags used for ftrace_replace_code()
  x86/ftrace: Fix use of flags in ftrace_replace_code()
  ftrace: Expose __ftrace_replace_code()
  powerpc/ftrace: Additionally nop out the preceding mflr with
-mprofile-kernel
  powerpc/ftrace: Update ftrace_location() for powerpc -mprofile-kernel
  kprobes/ftrace: Use ftrace_location() when [dis]arming probes
  powerpc/kprobes: Allow probing on any ftrace address

 arch/powerpc/kernel/kprobes-ftrace.c |  30 +++
 arch/powerpc/kernel/trace/ftrace.c   | 272 ---
 arch/x86/kernel/ftrace.c |   3 +-
 include/linux/ftrace.h   |   7 +
 kernel/kprobes.c |  10 +-
 kernel/trace/ftrace.c|  17 +-
 6 files changed, 300 insertions(+), 39 deletions(-)

-- 
2.22.0



[PATCH 4/7] powerpc/ftrace: Additionally nop out the preceding mflr with -mprofile-kernel

2019-06-18 Thread Naveen N. Rao
With -mprofile-kernel, gcc emits 'mflr r0', followed by 'bl _mcount' to
enable function tracing and profiling. So far, with dynamic ftrace, we
used to only patch out the branch to _mcount(). However, mflr is
executed by the branch unit that can only execute one per cycle on
POWER9 and shared with branches, so it would be nice to avoid it where
possible.

We cannot simply nop out the mflr either. When enabling function
tracing, there can be a race if tracing is enabled when some thread was
interrupted after executing a nop'ed out mflr. In this case, the thread
would execute the now-patched-in branch to _mcount() without having
executed the preceding mflr.

To solve this, we now enable function tracing in 2 steps: patch in the
mflr instruction, use synchronize_rcu_tasks() to ensure all existing
threads make progress, and then patch in the branch to _mcount(). We
override ftrace_replace_code() with a powerpc64 variant for this
purpose.

Suggested-by: Nicholas Piggin 
Reviewed-by: Nicholas Piggin 
Signed-off-by: Naveen N. Rao 
---
 arch/powerpc/kernel/trace/ftrace.c | 241 ++---
 1 file changed, 219 insertions(+), 22 deletions(-)

diff --git a/arch/powerpc/kernel/trace/ftrace.c 
b/arch/powerpc/kernel/trace/ftrace.c
index 517662a56bdc..5e2b29808af1 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -125,7 +125,7 @@ __ftrace_make_nop(struct module *mod,
 {
unsigned long entry, ptr, tramp;
unsigned long ip = rec->ip;
-   unsigned int op, pop;
+   unsigned int op;
 
/* read where this goes */
if (probe_kernel_read(, (void *)ip, sizeof(int))) {
@@ -160,8 +160,6 @@ __ftrace_make_nop(struct module *mod,
 
 #ifdef CONFIG_MPROFILE_KERNEL
/* When using -mkernel_profile there is no load to jump over */
-   pop = PPC_INST_NOP;
-
if (probe_kernel_read(, (void *)(ip - 4), 4)) {
pr_err("Fetching instruction at %lx failed.\n", ip - 4);
return -EFAULT;
@@ -169,26 +167,23 @@ __ftrace_make_nop(struct module *mod,
 
/* We expect either a mflr r0, or a std r0, LRSAVE(r1) */
if (op != PPC_INST_MFLR && op != PPC_INST_STD_LR) {
-   pr_err("Unexpected instruction %08x around bl _mcount\n", op);
+   pr_err("Unexpected instruction %08x before bl _mcount\n", op);
return -EINVAL;
}
-#else
-   /*
-* Our original call site looks like:
-*
-* bl 
-* ld r2,XX(r1)
-*
-* Milton Miller pointed out that we can not simply nop the branch.
-* If a task was preempted when calling a trace function, the nops
-* will remove the way to restore the TOC in r2 and the r2 TOC will
-* get corrupted.
-*
-* Use a b +8 to jump over the load.
-*/
 
-   pop = PPC_INST_BRANCH | 8;  /* b +8 */
+   /* We should patch out the bl to _mcount first */
+   if (patch_instruction((unsigned int *)ip, PPC_INST_NOP)) {
+   pr_err("Patching NOP failed.\n");
+   return -EPERM;
+   }
 
+   /* then, nop out the preceding 'mflr r0' as an optimization */
+   if (op == PPC_INST_MFLR &&
+   patch_instruction((unsigned int *)(ip - 4), PPC_INST_NOP)) {
+   pr_err("Patching NOP failed.\n");
+   return -EPERM;
+   }
+#else
/*
 * Check what is in the next instruction. We can see ld r2,40(r1), but
 * on first pass after boot we will see mflr r0.
@@ -202,12 +197,25 @@ __ftrace_make_nop(struct module *mod,
pr_err("Expected %08x found %08x\n", PPC_INST_LD_TOC, op);
return -EINVAL;
}
-#endif /* CONFIG_MPROFILE_KERNEL */
 
-   if (patch_instruction((unsigned int *)ip, pop)) {
+   /*
+* Our original call site looks like:
+*
+* bl 
+* ld r2,XX(r1)
+*
+* Milton Miller pointed out that we can not simply nop the branch.
+* If a task was preempted when calling a trace function, the nops
+* will remove the way to restore the TOC in r2 and the r2 TOC will
+* get corrupted.
+*
+* Use a b +8 to jump over the load.
+*/
+   if (patch_instruction((unsigned int *)ip, PPC_INST_BRANCH | 8)) {
pr_err("Patching NOP failed.\n");
return -EPERM;
}
+#endif /* CONFIG_MPROFILE_KERNEL */
 
return 0;
 }
@@ -421,6 +429,26 @@ static int __ftrace_make_nop_kernel(struct dyn_ftrace 
*rec, unsigned long addr)
return -EPERM;
}
 
+#ifdef CONFIG_MPROFILE_KERNEL
+   /* Nop out the preceding 'mflr r0' as an optimization */
+   if (probe_kernel_read(, (void *)(ip - 4), 4)) {
+   pr_err("Fetching instruction at %lx failed.\n", ip - 4);
+   return -E

[PATCH 7/7] powerpc/kprobes: Allow probing on any ftrace address

2019-06-18 Thread Naveen N. Rao
With KPROBES_ON_FTRACE, kprobe is allowed to be inserted on instructions
that branch to _mcount (referred to as ftrace location). With
-mprofile-kernel, we now include the preceding 'mflr r0' as being part
of the ftrace location.

However, by default, probing on an instruction that is not actually the
branch to _mcount() is prohibited, as that is considered to not be at an
instruction boundary. This is not the case on powerpc, so allow the same
by overriding arch_check_ftrace_location()

In addition, we update kprobe_ftrace_handler() to detect this scenarios
and to pass the proper nip to the pre and post probe handlers.

Signed-off-by: Naveen N. Rao 
---
 arch/powerpc/kernel/kprobes-ftrace.c | 30 
 1 file changed, 30 insertions(+)

diff --git a/arch/powerpc/kernel/kprobes-ftrace.c 
b/arch/powerpc/kernel/kprobes-ftrace.c
index 972cb28174b2..6a0bd3c16cb6 100644
--- a/arch/powerpc/kernel/kprobes-ftrace.c
+++ b/arch/powerpc/kernel/kprobes-ftrace.c
@@ -12,14 +12,34 @@
 #include 
 #include 
 
+/*
+ * With -mprofile-kernel, we patch two instructions -- the branch to _mcount
+ * as well as the preceding 'mflr r0'. Both these instructions are claimed
+ * by ftrace and we should allow probing on either instruction.
+ */
+int arch_check_ftrace_location(struct kprobe *p)
+{
+   if (ftrace_location((unsigned long)p->addr))
+   p->flags |= KPROBE_FLAG_FTRACE;
+   return 0;
+}
+
 /* Ftrace callback handler for kprobes */
 void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip,
   struct ftrace_ops *ops, struct pt_regs *regs)
 {
struct kprobe *p;
+   int mflr_kprobe = 0;
struct kprobe_ctlblk *kcb;
 
p = get_kprobe((kprobe_opcode_t *)nip);
+   if (unlikely(!p)) {
+   p = get_kprobe((kprobe_opcode_t *)(nip - MCOUNT_INSN_SIZE));
+   if (!p)
+   return;
+   mflr_kprobe = 1;
+   }
+
if (unlikely(!p) || kprobe_disabled(p))
return;
 
@@ -33,6 +53,9 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long 
parent_nip,
 */
regs->nip -= MCOUNT_INSN_SIZE;
 
+   if (mflr_kprobe)
+   regs->nip -= MCOUNT_INSN_SIZE;
+
__this_cpu_write(current_kprobe, p);
kcb->kprobe_status = KPROBE_HIT_ACTIVE;
if (!p->pre_handler || !p->pre_handler(p, regs)) {
@@ -45,6 +68,8 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long 
parent_nip,
kcb->kprobe_status = KPROBE_HIT_SSDONE;
p->post_handler(p, regs, 0);
}
+   if (mflr_kprobe)
+   regs->nip += MCOUNT_INSN_SIZE;
}
/*
 * If pre_handler returns !0, it changes regs->nip. We have to
@@ -57,6 +82,11 @@ NOKPROBE_SYMBOL(kprobe_ftrace_handler);
 
 int arch_prepare_kprobe_ftrace(struct kprobe *p)
 {
+   if ((unsigned long)p->addr & 0x03) {
+   printk("Attempt to register kprobe at an unaligned address\n");
+   return -EILSEQ;
+   }
+
p->ainsn.insn = NULL;
p->ainsn.boostable = -1;
return 0;
-- 
2.22.0



[PATCH 3/7] ftrace: Expose __ftrace_replace_code()

2019-06-18 Thread Naveen N. Rao
While over-riding ftrace_replace_code(), we still want to reuse the
existing __ftrace_replace_code() function. Rename the function and
make it available for other kernel code.

Signed-off-by: Naveen N. Rao 
---
 include/linux/ftrace.h | 1 +
 kernel/trace/ftrace.c  | 8 
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index e97789c95c4e..fa653a561da5 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -456,6 +456,7 @@ ftrace_set_early_filter(struct ftrace_ops *ops, char *buf, 
int enable);
 /* defined in arch */
 extern int ftrace_ip_converted(unsigned long ip);
 extern int ftrace_dyn_arch_init(void);
+extern int ftrace_replace_code_rec(struct dyn_ftrace *rec, int enable);
 extern void ftrace_replace_code(int enable);
 extern int ftrace_update_ftrace_func(ftrace_func_t func);
 extern void ftrace_caller(void);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 5710a6b3edc1..21d8e201ee80 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2351,8 +2351,8 @@ unsigned long ftrace_get_addr_curr(struct dyn_ftrace *rec)
return (unsigned long)FTRACE_ADDR;
 }
 
-static int
-__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
+int
+ftrace_replace_code_rec(struct dyn_ftrace *rec, int enable)
 {
unsigned long ftrace_old_addr;
unsigned long ftrace_addr;
@@ -2403,7 +2403,7 @@ void __weak ftrace_replace_code(int mod_flags)
if (rec->flags & FTRACE_FL_DISABLED)
continue;
 
-   failed = __ftrace_replace_code(rec, enable);
+   failed = ftrace_replace_code_rec(rec, enable);
if (failed) {
ftrace_bug(failed, rec);
/* Stop processing */
@@ -5827,7 +5827,7 @@ void ftrace_module_enable(struct module *mod)
rec->flags = cnt;
 
if (ftrace_start_up && cnt) {
-   int failed = __ftrace_replace_code(rec, 1);
+   int failed = ftrace_replace_code_rec(rec, 1);
if (failed) {
ftrace_bug(failed, rec);
goto out_loop;
-- 
2.22.0



[PATCH 1/7] ftrace: Expose flags used for ftrace_replace_code()

2019-06-18 Thread Naveen N. Rao
Since ftrace_replace_code() is a __weak function and can be overridden,
we need to expose the flags that can be set. So, move the flags enum to
the header file.

Reviewed-by: Steven Rostedt (VMware) 
Signed-off-by: Naveen N. Rao 
---
 include/linux/ftrace.h | 5 +
 kernel/trace/ftrace.c  | 5 -
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 25e2995d4a4c..e97789c95c4e 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -162,6 +162,11 @@ enum {
FTRACE_OPS_FL_TRACE_ARRAY   = 1 << 15,
 };
 
+enum {
+   FTRACE_MODIFY_ENABLE_FL = (1 << 0),
+   FTRACE_MODIFY_MAY_SLEEP_FL  = (1 << 1),
+};
+
 #ifdef CONFIG_DYNAMIC_FTRACE
 /* The hash used to know what functions callbacks trace */
 struct ftrace_ops_hash {
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 38277af44f5c..5710a6b3edc1 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -75,11 +75,6 @@
 #define INIT_OPS_HASH(opsname)
 #endif
 
-enum {
-   FTRACE_MODIFY_ENABLE_FL = (1 << 0),
-   FTRACE_MODIFY_MAY_SLEEP_FL  = (1 << 1),
-};
-
 struct ftrace_ops ftrace_list_end __read_mostly = {
.func   = ftrace_stub,
.flags  = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_STUB,
-- 
2.22.0



[PATCH 2/7] x86/ftrace: Fix use of flags in ftrace_replace_code()

2019-06-18 Thread Naveen N. Rao
In commit a0572f687fb3c ("ftrace: Allow ftrace_replace_code() to be
schedulable), the generic ftrace_replace_code() function was modified to
accept a flags argument in place of a single 'enable' flag. However, the
x86 version of this function was not updated. Fix the same.

Fixes: a0572f687fb3c ("ftrace: Allow ftrace_replace_code() to be schedulable")
Signed-off-by: Naveen N. Rao 
---
 arch/x86/kernel/ftrace.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 0927bb158ffc..f34005a17051 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -573,8 +573,9 @@ static void run_sync(void)
local_irq_disable();
 }
 
-void ftrace_replace_code(int enable)
+void ftrace_replace_code(int mod_flags)
 {
+   int enable = mod_flags & FTRACE_MODIFY_ENABLE_FL;
struct ftrace_rec_iter *iter;
struct dyn_ftrace *rec;
const char *report = "adding breakpoints";
-- 
2.22.0



Re: [PATCH] Powerpc/Watchpoint: Restore nvgprs while returning from exception

2019-06-06 Thread Naveen N. Rao

Ravi Bangoria wrote:

Powerpc hw triggers watchpoint before executing the instruction.
To make trigger-after-execute behavior, kernel emulates the
instruction. If the instruction is 'load something into non-
volatile register', exception handler should restore emulated
register state while returning back, otherwise there will be
register state corruption. Ex, Adding a watchpoint on a list
can corrput the list:

  # cat /proc/kallsyms | grep kthread_create_list
  c121c8b8 d kthread_create_list

Add watchpoint on kthread_create_list->next:

  # perf record -e mem:0xc121c8c0

Run some workload such that new kthread gets invoked. Ex, I
just logged out from console:

  list_add corruption. next->prev should be prev (c1214e00), \
but was c121c8b8. (next=c121c8b8).
  WARNING: CPU: 59 PID: 309 at lib/list_debug.c:25 __list_add_valid+0xb4/0xc0
  CPU: 59 PID: 309 Comm: kworker/59:0 Kdump: loaded Not tainted 5.1.0-rc7+ #69
  ...
  NIP __list_add_valid+0xb4/0xc0
  LR __list_add_valid+0xb0/0xc0
  Call Trace:
  __list_add_valid+0xb0/0xc0 (unreliable)
  __kthread_create_on_node+0xe0/0x260
  kthread_create_on_node+0x34/0x50
  create_worker+0xe8/0x260
  worker_thread+0x444/0x560
  kthread+0x160/0x1a0
  ret_from_kernel_thread+0x5c/0x70

Signed-off-by: Ravi Bangoria 
---
 arch/powerpc/kernel/exceptions-64s.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)



Awesome catch - this one has had a glorious run...
Fixes: 5aae8a5370802 ("powerpc, hw_breakpoints: Implement hw_breakpoints for 64-bit 
server processors")

Reviewed-by: Naveen N. Rao 


- Naveen




Re: [RFC PATCH 2/4] x86/ftrace: Fix use of flags in ftrace_replace_code()

2019-05-20 Thread Naveen N. Rao

Hi Steven,

Steven Rostedt wrote:

On Mon, 20 May 2019 09:13:20 -0400
Steven Rostedt  wrote:


> I haven't yet tested this patch on x86, but this looked wrong so sending 
> this as a RFC.  


This code has been through a bit of updates, and I need to go through
and clean it up. I'll have to take a look and convert "int" to "bool"
so that "enable" is not confusing.

Thanks, I think I'll try to do a clean up first, and then this patch
shouldn't "look wrong" after that.



I'm going to apply the attached two patches. There may be some
conflicts between yours here and these, but nothing that Linus can't
figure out. Do you feel more comfortable with this code, if these
patches are applied?


Thanks, that definitely helps make things clearer. A very small nit from 
your first patch -- it would be good to also convert the calls to 
ftrace_check_record() to use 'true' or 'false' for the 'update' field.


I will test my series in more detail and post a v1.


- Naveen




Re: [RFC PATCH 4/4] powerpc/ftrace: Additionally nop out the preceding mflr with -mprofile-kernel

2019-05-20 Thread Naveen N. Rao

Nicholas Piggin wrote:

Naveen N. Rao's on May 18, 2019 5:02 am:

With -mprofile-kernel, gcc emits 'mflr r0', followed by 'bl _mcount' to
enable function tracing and profiling. So far, with dynamic ftrace, we
used to only patch out the branch to _mcount(). However, Nick Piggin
points out that "mflr is executed by the branch unit that can only
execute one per cycle on POWER9 and shared with branches, so it would be
nice to avoid it where possible."

We cannot simply nop out the mflr either. Michael Ellerman pointed out
that when enabling function tracing, there can be a race if tracing is
enabled when some thread was interrupted after executing a nop'ed out
mflr. In this case, the thread would execute the now-patched-in branch
to _mcount() without having executed the preceding mflr.

To solve this, we now enable function tracing in 2 steps: patch in the
mflr instruction, use synchronize_rcu_tasks() to ensure all existing
threads make progress, and then patch in the branch to _mcount(). We
override ftrace_replace_code() with a powerpc64 variant for this
purpose.

Signed-off-by: Nicholas Piggin 
Signed-off-by: Naveen N. Rao 


Nice! Thanks for doing a real patch. You needn't add my SOB there: my
hack was obviously garbage :) Suggested-by if anything, then for
clarity of changelog you can write the motivation directly rather than
quote me.


Thanks, I meant to call out the fact that I had added your SOB before
sending the patch, but missed doing so. Your patch was perfectly fine ;)



I don't know the ftrace subsystem well, but the powerpc instructions
and patching sequence appears to match what we agreed is the right way
to go.

As a suggestion, I would perhaps add most of information from the
second and third paragraphs of the changelog into comments
(and also explain that the lone mflr r0 is harmless).

But otherwise it looks good

Reviewed-by: Nicholas Piggin 


Thanks, I will incorporate those changes.


- Naveen




[RFC PATCH 4/4] powerpc/ftrace: Additionally nop out the preceding mflr with -mprofile-kernel

2019-05-17 Thread Naveen N. Rao
With -mprofile-kernel, gcc emits 'mflr r0', followed by 'bl _mcount' to
enable function tracing and profiling. So far, with dynamic ftrace, we
used to only patch out the branch to _mcount(). However, Nick Piggin
points out that "mflr is executed by the branch unit that can only
execute one per cycle on POWER9 and shared with branches, so it would be
nice to avoid it where possible."

We cannot simply nop out the mflr either. Michael Ellerman pointed out
that when enabling function tracing, there can be a race if tracing is
enabled when some thread was interrupted after executing a nop'ed out
mflr. In this case, the thread would execute the now-patched-in branch
to _mcount() without having executed the preceding mflr.

To solve this, we now enable function tracing in 2 steps: patch in the
mflr instruction, use synchronize_rcu_tasks() to ensure all existing
threads make progress, and then patch in the branch to _mcount(). We
override ftrace_replace_code() with a powerpc64 variant for this
purpose.

Signed-off-by: Nicholas Piggin 
Signed-off-by: Naveen N. Rao 
---
 arch/powerpc/kernel/trace/ftrace.c | 188 +
 1 file changed, 166 insertions(+), 22 deletions(-)

diff --git a/arch/powerpc/kernel/trace/ftrace.c 
b/arch/powerpc/kernel/trace/ftrace.c
index 517662a56bdc..5c3523c3b259 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -125,7 +125,7 @@ __ftrace_make_nop(struct module *mod,
 {
unsigned long entry, ptr, tramp;
unsigned long ip = rec->ip;
-   unsigned int op, pop;
+   unsigned int op;
 
/* read where this goes */
if (probe_kernel_read(, (void *)ip, sizeof(int))) {
@@ -160,8 +160,6 @@ __ftrace_make_nop(struct module *mod,
 
 #ifdef CONFIG_MPROFILE_KERNEL
/* When using -mkernel_profile there is no load to jump over */
-   pop = PPC_INST_NOP;
-
if (probe_kernel_read(, (void *)(ip - 4), 4)) {
pr_err("Fetching instruction at %lx failed.\n", ip - 4);
return -EFAULT;
@@ -169,26 +167,22 @@ __ftrace_make_nop(struct module *mod,
 
/* We expect either a mflr r0, or a std r0, LRSAVE(r1) */
if (op != PPC_INST_MFLR && op != PPC_INST_STD_LR) {
-   pr_err("Unexpected instruction %08x around bl _mcount\n", op);
+   pr_err("Unexpected instruction %08x before bl _mcount\n", op);
return -EINVAL;
}
-#else
-   /*
-* Our original call site looks like:
-*
-* bl 
-* ld r2,XX(r1)
-*
-* Milton Miller pointed out that we can not simply nop the branch.
-* If a task was preempted when calling a trace function, the nops
-* will remove the way to restore the TOC in r2 and the r2 TOC will
-* get corrupted.
-*
-* Use a b +8 to jump over the load.
-*/
 
-   pop = PPC_INST_BRANCH | 8;  /* b +8 */
+   /* We should patch out the bl to _mcount first */
+   if (patch_instruction((unsigned int *)ip, PPC_INST_NOP)) {
+   pr_err("Patching NOP failed.\n");
+   return -EPERM;
+   }
 
+   if (op == PPC_INST_MFLR &&
+   patch_instruction((unsigned int *)(ip - 4), PPC_INST_NOP)) {
+   pr_err("Patching NOP failed.\n");
+   return -EPERM;
+   }
+#else
/*
 * Check what is in the next instruction. We can see ld r2,40(r1), but
 * on first pass after boot we will see mflr r0.
@@ -202,12 +196,25 @@ __ftrace_make_nop(struct module *mod,
pr_err("Expected %08x found %08x\n", PPC_INST_LD_TOC, op);
return -EINVAL;
}
-#endif /* CONFIG_MPROFILE_KERNEL */
 
-   if (patch_instruction((unsigned int *)ip, pop)) {
+   /*
+* Our original call site looks like:
+*
+* bl 
+* ld r2,XX(r1)
+*
+* Milton Miller pointed out that we can not simply nop the branch.
+* If a task was preempted when calling a trace function, the nops
+* will remove the way to restore the TOC in r2 and the r2 TOC will
+* get corrupted.
+*
+* Use a b +8 to jump over the load.
+*/
+   if (patch_instruction((unsigned int *)ip, PPC_INST_BRANCH | 8)) {
pr_err("Patching NOP failed.\n");
return -EPERM;
}
+#endif /* CONFIG_MPROFILE_KERNEL */
 
return 0;
 }
@@ -421,6 +428,25 @@ static int __ftrace_make_nop_kernel(struct dyn_ftrace 
*rec, unsigned long addr)
return -EPERM;
}
 
+#ifdef CONFIG_MPROFILE_KERNEL
+   if (probe_kernel_read(, (void *)(ip - 4), 4)) {
+   pr_err("Fetching instruction at %lx failed.\n", ip - 4);
+   return -EFAULT;
+   }
+
+   /* We expect either a mflr r0, or a std r0, LRSAVE(r1) */
+   if (

[RFC PATCH 1/4] ftrace: Expose flags used for ftrace_replace_code()

2019-05-17 Thread Naveen N. Rao
Since ftrace_replace_code() is a __weak function and can be overridden,
we need to expose the flags that can be set. So, move the flags enum to
the header file.

Signed-off-by: Naveen N. Rao 
---
 include/linux/ftrace.h | 5 +
 kernel/trace/ftrace.c  | 5 -
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 20899919ead8..835e761f63b0 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -162,6 +162,11 @@ enum {
FTRACE_OPS_FL_TRACE_ARRAY   = 1 << 15,
 };
 
+enum {
+   FTRACE_MODIFY_ENABLE_FL = (1 << 0),
+   FTRACE_MODIFY_MAY_SLEEP_FL  = (1 << 1),
+};
+
 #ifdef CONFIG_DYNAMIC_FTRACE
 /* The hash used to know what functions callbacks trace */
 struct ftrace_ops_hash {
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index b920358dd8f7..38c15cd27fc4 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -78,11 +78,6 @@
 #define ASSIGN_OPS_HASH(opsname, val)
 #endif
 
-enum {
-   FTRACE_MODIFY_ENABLE_FL = (1 << 0),
-   FTRACE_MODIFY_MAY_SLEEP_FL  = (1 << 1),
-};
-
 struct ftrace_ops ftrace_list_end __read_mostly = {
.func   = ftrace_stub,
.flags  = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_STUB,
-- 
2.21.0



  1   2   3   4   5   6   7   8   9   10   >