Re: [PATCH v6] arch/powerpc/kvm: Add support for reading VPA counters for pseries guests

2024-05-06 Thread Naveen N Rao
On Mon, May 06, 2024 at 08:26:03PM GMT, Gautam Menghani wrote:
> PAPR hypervisor has introduced three new counters in the VPA area of
> LPAR CPUs for KVM L2 guest (see [1] for terminology) observability - 2
> for context switches from host to guest and vice versa, and 1 counter
> for getting the total time spent inside the KVM guest. Add a tracepoint
> that enables reading the counters for use by ftrace/perf. Note that this
> tracepoint is only available for nestedv2 API (i.e, KVM on PowerVM).
> 
> [1] Terminology:
> a. L1 refers to the VM (LPAR) booted on top of PAPR hypervisor
> b. L2 refers to the KVM guest booted on top of L1.
> 
> Signed-off-by: Vaibhav Jain 
> Signed-off-by: Gautam Menghani 
> ---
> v5 -> v6:
> 1. Use TRACE_EVENT_FN to enable/disable counters only once.
> 2. Remove the agg. counters from vcpu->arch.
> 3. Use PACA to maintain old counter values instead of zeroing on every
> entry.
> 4. Simplify variable names
> 
> v4 -> v5:
> 1. Define helper functions for getting/setting the accumulation counter
> in L2's VPA
> 
> v3 -> v4:
> 1. After vcpu_run, check the VPA flag instead of checking for tracepoint
> being enabled for disabling the cs time accumulation.
> 
> v2 -> v3:
> 1. Move the counter disabling and zeroing code to a different function.
> 2. Move the get_lppaca() inside the tracepoint_enabled() branch.
> 3. Add the aggregation logic to maintain total context switch time.
> 
> v1 -> v2:
> 1. Fix the build error due to invalid struct member reference.
> 
>  arch/powerpc/include/asm/lppaca.h | 11 +--
>  arch/powerpc/include/asm/paca.h   |  5 +++
>  arch/powerpc/kvm/book3s_hv.c  | 52 +++
>  arch/powerpc/kvm/trace_hv.h   | 27 
>  4 files changed, 92 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/lppaca.h 
> b/arch/powerpc/include/asm/lppaca.h
> index 61ec2447dabf..f40a646bee3c 100644
> --- a/arch/powerpc/include/asm/lppaca.h
> +++ b/arch/powerpc/include/asm/lppaca.h
> @@ -62,7 +62,8 @@ struct lppaca {
>   u8  donate_dedicated_cpu;   /* Donate dedicated CPU cycles */
>   u8  fpregs_in_use;
>   u8  pmcregs_in_use;
> - u8  reserved8[28];
> + u8  l2_counters_enable;  /* Enable usage of counters for KVM guest 
> */
> + u8  reserved8[27];
>   __be64  wait_state_cycles;  /* Wait cycles for this proc */
>   u8  reserved9[28];
>   __be16  slb_count;  /* # of SLBs to maintain */
> @@ -92,9 +93,13 @@ struct lppaca {
>   /* cacheline 4-5 */
>  
>   __be32  page_ins;   /* CMO Hint - # page ins by OS */
> - u8  reserved12[148];
> + u8  reserved12[28];
> + volatile __be64 l1_to_l2_cs_tb;
> + volatile __be64 l2_to_l1_cs_tb;
> + volatile __be64 l2_runtime_tb;
> + u8 reserved13[96];
>   volatile __be64 dtl_idx;/* Dispatch Trace Log head index */
> - u8  reserved13[96];
> + u8  reserved14[96];
>  } cacheline_aligned;
>  
>  #define lppaca_of(cpu)   (*paca_ptrs[cpu]->lppaca_ptr)
> diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
> index 1d58da946739..f20ac7a6efa4 100644
> --- a/arch/powerpc/include/asm/paca.h
> +++ b/arch/powerpc/include/asm/paca.h
> @@ -278,6 +278,11 @@ struct paca_struct {
>   struct mce_info *mce_info;
>   u8 mce_pending_irq_work;
>  #endif /* CONFIG_PPC_BOOK3S_64 */
> +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
> + u64 l1_to_l2_cs;
> + u64 l2_to_l1_cs;
> + u64 l2_runtime_agg;
> +#endif
>  } cacheline_aligned;
>  
>  extern void copy_mm_to_paca(struct mm_struct *mm);
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 8e86eb577eb8..ed69ad58bd02 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -4108,6 +4108,54 @@ static void vcpu_vpa_increment_dispatch(struct 
> kvm_vcpu *vcpu)
>   }
>  }
>  
> +static inline int kvmhv_get_l2_counters_status(void)
> +{
> + return get_lppaca()->l2_counters_enable;
> +}
> +
> +static inline void kvmhv_set_l2_counters_status(int cpu, bool status)
> +{
> + if (status)
> + lppaca_of(cpu).l2_counters_enable = 1;
> + else
> + lppaca_of(cpu).l2_counters_enable = 0;
> +}
> +
> +int kmvhv_counters_tracepoint_regfunc(void)
> +{
> + int cpu;
> +
> + for_each_possible_cpu(cpu) {
> + kvmhv_set_l2_counters_status(cpu, true);
> + }
> + return 0;
> +}
> +
> +void kmvhv_counters_tracepoint_unregfunc(void)
> +{
> + int cpu;
> +
> + for_each_possible_cpu(cpu) {
> + kvmhv_set_l2_counters_status(cpu, false);
> + }
> +}
> +
> +static void do_trace_nested_cs_time(struct kvm_vcpu *vcpu)
> +{
> + struct lppaca *lp = get_lppaca();
> + u64 l1_to_l2_ns, l2_to_l1_ns, l2_runtime_ns;
> +
> + l1_to_l2_ns = tb_to_ns(be64_to_cpu(lp->l1_to_l2_cs_tb));
> + l2_to_l1_ns = tb_to_ns(be64_to_cpu(lp->l2_to_l1_cs_tb));
> + l2_r

[powerpc:next] BUILD SUCCESS be140f1732b523947425aaafbe2e37b41b622d96

2024-05-06 Thread kernel test robot
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git 
next
branch HEAD: be140f1732b523947425aaafbe2e37b41b622d96  powerpc/64: Set _IO_BASE 
to POISON_POINTER_DELTA not 0 for CONFIG_PCI=n

elapsed time: 923m

configs tested: 181
configs skipped: 4

The following configs have been built successfully.
More configs may be tested in the coming days.

tested configs:
alpha allnoconfig   gcc  
alphaallyesconfig   gcc  
alpha   defconfig   gcc  
arc  allmodconfig   gcc  
arc   allnoconfig   gcc  
arc  allyesconfig   gcc  
arc defconfig   gcc  
arc   randconfig-001-20240507   gcc  
arc   randconfig-002-20240507   gcc  
arm  allmodconfig   gcc  
arm   allnoconfig   clang
arm  allyesconfig   gcc  
arm defconfig   clang
arm   randconfig-001-20240507   gcc  
arm   randconfig-002-20240507   clang
arm   randconfig-003-20240507   gcc  
arm   randconfig-004-20240507   clang
arm   spitz_defconfig   gcc  
arm64allmodconfig   clang
arm64 allnoconfig   gcc  
arm64   defconfig   gcc  
arm64 randconfig-001-20240507   clang
arm64 randconfig-002-20240507   clang
arm64 randconfig-003-20240507   clang
arm64 randconfig-004-20240507   clang
csky allmodconfig   gcc  
csky  allnoconfig   gcc  
csky allyesconfig   gcc  
cskydefconfig   gcc  
csky  randconfig-001-20240507   gcc  
csky  randconfig-002-20240507   gcc  
hexagon  allmodconfig   clang
hexagon   allnoconfig   clang
hexagon  allyesconfig   clang
hexagon defconfig   clang
hexagon   randconfig-001-20240507   clang
hexagon   randconfig-002-20240507   clang
i386 allmodconfig   gcc  
i386  allnoconfig   gcc  
i386 allyesconfig   gcc  
i386 buildonly-randconfig-001-20240507   clang
i386 buildonly-randconfig-002-20240507   clang
i386 buildonly-randconfig-003-20240507   clang
i386 buildonly-randconfig-004-20240507   gcc  
i386 buildonly-randconfig-005-20240507   gcc  
i386 buildonly-randconfig-006-20240507   clang
i386defconfig   clang
i386  randconfig-001-20240507   clang
i386  randconfig-002-20240507   gcc  
i386  randconfig-003-20240507   clang
i386  randconfig-004-20240507   clang
i386  randconfig-005-20240507   clang
i386  randconfig-006-20240507   clang
i386  randconfig-011-20240507   gcc  
i386  randconfig-012-20240507   clang
i386  randconfig-013-20240507   clang
i386  randconfig-014-20240507   gcc  
i386  randconfig-015-20240507   gcc  
i386  randconfig-016-20240507   clang
loongarchallmodconfig   gcc  
loongarch allnoconfig   gcc  
loongarchallyesconfig   gcc  
loongarch   defconfig   gcc  
loongarch randconfig-001-20240507   gcc  
loongarch randconfig-002-20240507   gcc  
m68k allmodconfig   gcc  
m68k  allnoconfig   gcc  
m68k allyesconfig   gcc  
m68kdefconfig   gcc  
m68k   virt_defconfig   gcc  
microblaze   allmodconfig   gcc  
microblazeallnoconfig   gcc  
microblaze   allyesconfig   gcc  
microblaze  defconfig   gcc  
mips allmodconfig   gcc  
mips  allnoconfig   gcc  
mips allyesconfig   gcc  
mips  malta_kvm_defconfig   gcc  
nios2allmodconfig   gcc  
nios2 allnoconfig   gcc  
nios2allyesconfig   gcc  
nios2   defconfig   gcc  
nios2 randconfig-001-20240507   gcc  
nios2 randconfig-002-20240507   gcc  
openrisc alldefconfig   gcc  
openrisc 

Re: [PATCH V2 4/9] tools/perf: Add support to capture and parse raw instruction in objdump

2024-05-06 Thread Namhyung Kim
On Mon, May 6, 2024 at 5:21 AM Athira Rajeev
 wrote:
>
> Add support to capture and parse raw instruction in objdump.
> Currently, the perf tool infrastructure uses "--no-show-raw-insn" option
> with "objdump" while disassemble. Example from powerpc with this option
> for an instruction address is:
>
> Snippet from:
> objdump  --start-address= --stop-address=  -d 
> --no-show-raw-insn -C 
>
> c10224b4:   lwz r10,0(r9)
>
> This line "lwz r10,0(r9)" is parsed to extract instruction name,
> registers names and offset. Also to find whether there is a memory
> reference in the operands, "memory_ref_char" field of objdump is used.
> For x86, "(" is used as memory_ref_char to tackle instructions of the
> form "mov  (%rax), %rcx".
>
> In case of powerpc, not all instructions using "(" are the only memory
> instructions. Example, above instruction can also be of extended form (X
> form) "lwzx r10,0,r19". Inorder to easy identify the instruction category
> and extract the source/target registers, patch adds support to use raw
> instruction. With raw instruction, macros are added to extract opcode
> and register fields.
>
> "struct ins_operands" and "struct ins" is updated to carry opcode and
> raw instruction binary code (raw_insn). Function "disasm_line__parse"
> is updated to fill the raw instruction hex value and opcode in newly
> added fields. There is no changes in existing code paths, which parses
> the disassembled code. The architecture using the instruction name and
> present approach is not altered. Since this approach targets powerpc,
> the macro implementation is added for powerpc as of now.
>
> Example:
> representation using --show-raw-insn in objdump gives result:
>
> 38 01 81 e8 ld  r4,312(r1)
>
> Here "38 01 81 e8" is the raw instruction representation. In powerpc,
> this translates to instruction form: "ld RT,DS(RA)" and binary code
> as:
> _
> | 58 |  RT  |  RA |  DS   | |
> -
> 06 1116  30 31
>
> Function "disasm_line__parse" is updated to capture:
>
> line:38 01 81 e8 ld  r4,312(r1)
> opcode and raw instruction "38 01 81 e8"
> Raw instruction is used later to extract the reg/offset fields.
>
> Signed-off-by: Athira Rajeev 
> ---
>  tools/include/linux/string.h  |  2 +
>  tools/lib/string.c| 13 +++
>  tools/perf/arch/powerpc/util/dwarf-regs.c | 19 ++
>  tools/perf/util/disasm.c  | 46 +++
>  tools/perf/util/disasm.h  |  6 +++
>  tools/perf/util/include/dwarf-regs.h  |  9 +
>  6 files changed, 88 insertions(+), 7 deletions(-)
>
> diff --git a/tools/include/linux/string.h b/tools/include/linux/string.h
> index db5c99318c79..0acb1fc14e19 100644
> --- a/tools/include/linux/string.h
> +++ b/tools/include/linux/string.h
> @@ -46,5 +46,7 @@ extern char * __must_check skip_spaces(const char *);
>
>  extern char *strim(char *);
>
> +extern void remove_spaces(char *s);
> +
>  extern void *memchr_inv(const void *start, int c, size_t bytes);
>  #endif /* _TOOLS_LINUX_STRING_H_ */
> diff --git a/tools/lib/string.c b/tools/lib/string.c
> index 8b6892f959ab..21d273e69951 100644
> --- a/tools/lib/string.c
> +++ b/tools/lib/string.c
> @@ -153,6 +153,19 @@ char *strim(char *s)
> return skip_spaces(s);
>  }
>
> +/*
> + * remove_spaces - Removes whitespaces from @s
> + */
> +void remove_spaces(char *s)
> +{
> +   char *d = s;
> +   do {
> +   while (*d == ' ') {
> +   ++d;
> +   }
> +   } while ((*s++ = *d++));
> +}
> +
>  /**
>   * strreplace - Replace all occurrences of character in string.
>   * @s: The string to operate on.
> diff --git a/tools/perf/arch/powerpc/util/dwarf-regs.c 
> b/tools/perf/arch/powerpc/util/dwarf-regs.c
> index 0c4f4caf53ac..e60a71fd846e 100644
> --- a/tools/perf/arch/powerpc/util/dwarf-regs.c
> +++ b/tools/perf/arch/powerpc/util/dwarf-regs.c
> @@ -98,3 +98,22 @@ int regs_query_register_offset(const char *name)
> return roff->ptregs_offset;
> return -EINVAL;
>  }
> +
> +#definePPC_OP(op)  (((op) >> 26) & 0x3F)
> +#define PPC_RA(a)  (((a) >> 16) & 0x1f)
> +#define PPC_RT(t)  (((t) >> 21) & 0x1f)
> +
> +int get_opcode_insn(unsigned int raw_insn)
> +{
> +   return PPC_OP(raw_insn);
> +}
> +
> +int get_source_reg(unsigned int raw_insn)
> +{
> +   return PPC_RA(raw_insn);
> +}
> +
> +int get_target_reg(unsigned int raw_insn)
> +{
> +   return PPC_RT(raw_insn);
> +}
> diff --git a/tools/perf/util/disasm.c b/tools/perf/util/disasm.c
> index 2de66a092cab..85692f73e78f 100644
> --- a/tools/perf/util/disasm.c
> +++ b/tools/perf/util/disasm.c
> @@ -43,7 +43,7 @@ static int call__scnprintf(struct ins *ins, char *bf, 
> size_t size,
>struct ins_operands *ops, int max_ins_name);
>
>  static void ins__sort(st

Re: [PATCH V2 3/9] tools/perf: Fix a comment about multi_regs in extract_reg_offset function

2024-05-06 Thread Namhyung Kim
On Mon, May 6, 2024 at 5:19 AM Athira Rajeev
 wrote:
>
> Fix a comment in function which explains how multi_regs field gets set
> for an instruction. In the example, "mov  %rsi, 8(%rbx,%rcx,4)", the
> comment mistakenly referred to "dst_multi_regs = 0". Correct it to use
> "src_multi_regs = 0"
>
> Signed-off-by: Athira Rajeev 

Acked-by: Namhyung Kim 

Thanks,
Namhyung

> ---
>  tools/perf/util/annotate.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
> index f5b6b5e5e757..0f5e10654d09 100644
> --- a/tools/perf/util/annotate.c
> +++ b/tools/perf/util/annotate.c
> @@ -2093,7 +2093,7 @@ static int extract_reg_offset(struct arch *arch, const 
> char *str,
>   *   mov  0x18, %r8  # src_reg1 = -1, src_mem = 0
>   *   # dst_reg1 = r8, dst_mem = 0
>   *
> - *   mov  %rsi, 8(%rbx,%rcx,4)  # src_reg1 = rsi, src_mem = 0, 
> dst_multi_regs = 0
> + *   mov  %rsi, 8(%rbx,%rcx,4)  # src_reg1 = rsi, src_mem = 0, 
> src_multi_regs = 0
>   *  # dst_reg1 = rbx, dst_reg2 = rcx, dst_mem = 1
>   *  # dst_multi_regs = 1, dst_offset = 8
>   */
> --
> 2.43.0
>


Re: [PATCH V2 0/9] Add data type profiling support for powerpc

2024-05-06 Thread Namhyung Kim
Hello,

On Mon, May 6, 2024 at 5:19 AM Athira Rajeev
 wrote:
>
> The patchset from Namhyung added support for data type profiling
> in perf tool. This enabled support to associate PMU samples to data
> types they refer using DWARF debug information. With the upstream
> perf, currently it possible to run perf report or perf annotate to
> view the data type information on x86.
>
> Initial patchset posted here had changes need to enable data type
> profiling support for powerpc.
>
> https://lore.kernel.org/all/6e09dc28-4a2e-49d8-a2b5-ffb3396a9...@csgroup.eu/T/
>
> Main change were:
> 1. powerpc instruction nmemonic table to associate load/store
> instructions with move_ops which is use to identify if instruction
> is a memory access one.
> 2. To get register number and access offset from the given
> instruction, code uses fields from "struct arch" -> objump.
> Added entry for powerpc here.
> 3. A get_arch_regnum to return register number from the
> register name string.
>
> But the apporach used in the initial patchset used parsing of
> disassembled code which the current perf tool implementation does.
>
> Example: lwz r10,0(r9)
>
> This line "lwz r10,0(r9)" is parsed to extract instruction name,
> registers names and offset. Also to find whether there is a memory
> reference in the operands, "memory_ref_char" field of objdump is used.
> For x86, "(" is used as memory_ref_char to tackle instructions of the
> form "mov  (%rax), %rcx".
>
> In case of powerpc, not all instructions using "(" are the only memory
> instructions. Example, above instruction can also be of extended form (X
> form) "lwzx r10,0,r19". Inorder to easy identify the instruction category
> and extract the source/target registers, this patchset adds support to use
> raw instruction. With raw instruction, macros are added to extract opcode
> and register fields.
>
> Example representation using --show-raw-insn in objdump gives result:
>
> 38 01 81 e8 ld  r4,312(r1)
>
> Here "38 01 81 e8" is the raw instruction representation. In powerpc,
> this translates to instruction form: "ld RT,DS(RA)" and binary code
> as:
> _
> | 58 |  RT  |  RA |  DS   | |
> -
> 06 1116  30 31
>
> Patchset adds support to pick the opcode and reg fields from this
> raw/binary instruction code. This approach came in from review comment
> by Segher Boessenkool for the initial patchset.
>
> Apart from that, instruction tracking is enabled for powerpc and
> support function is added to find variables defined as registers
> Example, in powerpc, two registers are
> defined to represent variable:
> 1. r13: represents local_paca
> register struct paca_struct *local_paca asm("r13");
>
> 2. r1: represents stack_pointer
> register void *__stack_pointer asm("r1");
>
> These are handled in this patchset.
>
> - Patch 1 is to rearrange register state type structures to header file
> so that it can referred from other arch specific files
> - Patch 2 is to make instruction tracking as a callback to"struct arch"
> so that it can be implemented by other archs easily and defined in arch
> specific files
> - Patch 3 is to fix a small comment
> - Patch 4 adds support to capture and parse raw instruction in objdump
> by keeping existing approach intact.
> - Patch 5 update parameters for reg extract functions to use raw
> instruction on powerpc
> - Patch 6 and patch 7 handles instruction tracking for powerpc.
> - Patch 8 and Patch 8 handles support to find global register variables
>
> With the current patchset:
>
>  ./perf record -a -e mem-loads sleep 1
>  ./perf report -s type,typeoff --hierarchy --group --stdio
>  ./perf annotate --data-type --insn-stat
>
> perf annotate logs:
>
> Annotate Instruction stats
> total 562, ok 441 (78.5%), bad 121 (21.5%)
>
>   Name  :  Good   Bad
> ---
>   ld:   31354
>   lwz   :5132
>   lbz   :31 5
>   ldx   : 621
>   lhz   :23 0
>   lwa   : 4 3
>   lwarx : 5 0
>   lwzx  : 2 2
>   ldarx : 3 0
>   lwzu  : 2 0
>   stdcx.: 0 1
>   nop   : 0 1
>   ldu   : 1 0
>   lbzx  : 0 1
>   lwax  : 0 1
>
> perf report logs:
>
> # Samples: 1K of event 'mem-loads'
> # Event count (approx.): 937238
> #
> # Overhead  Data Type  Data Type Offset
> #   .  
> #
> 48.81%  (unknown)  (unknown) +0 (no field)
> 12.85%  long unsigned int  long unsigned int +0 (current_stack_pointer)
>  4.68%  struct paca_struct  struct paca_struct +2312 (__current)
>  4.57%  struct paca_struct  struct paca_struct +2354 (irq_soft_mask)
>  2.68%  struct paca_struct  struct paca_struct +8 (paca_index)
>  2.64%  struct paca_struct  struct paca_struct +2808 (canary)
>  2.24%  struct paca_struct  stru

Re: [kvm-unit-tests PATCH v9 03/31] powerpc: Mark known failing tests as kfail

2024-05-06 Thread Nicholas Piggin
On Mon May 6, 2024 at 5:37 PM AEST, Thomas Huth wrote:
> On 04/05/2024 14.28, Nicholas Piggin wrote:
> > Mark the failing h_cede_tm and spapr_vpa tests as kfail.
> > 
> > Signed-off-by: Nicholas Piggin 
> > ---
> >   powerpc/spapr_vpa.c | 3 ++-
> >   powerpc/tm.c| 3 ++-
> >   2 files changed, 4 insertions(+), 2 deletions(-)
> > 
> > diff --git a/powerpc/spapr_vpa.c b/powerpc/spapr_vpa.c
> > index c2075e157..46fa0485c 100644
> > --- a/powerpc/spapr_vpa.c
> > +++ b/powerpc/spapr_vpa.c
> > @@ -150,7 +150,8 @@ static void test_vpa(void)
> > report_fail("Could not deregister after registration");
> >   
> > disp_count1 = be32_to_cpu(vpa->vp_dispatch_count);
> > -   report(disp_count1 % 2 == 1, "Dispatch count is odd after deregister");
> > +   /* TCG known fail, could be wrong test, must verify against PowerVM */
> > +   report_kfail(true, disp_count1 % 2 == 1, "Dispatch count is odd after 
> > deregister");
>
> Using "true" as first argument looks rather pointless - then you could also 
> simply delete the test completely if it can never be tested reliably.
>
> Thus could you please introduce a helper function is_tcg() that could be 
> used to check whether we run under TCG (and not KVM)? I think you could 
> check for "linux,kvm" in the "compatible" property in /hypervisor in the 
> device tree to see whether we're running in KVM mode or in TCG mode.

This I added in patch 30.

The reason for the suboptimal patch ordering was just me being lazy and
avoiding rebasing annoyance. I'd written a bunch of failing test cases
for QEMU work, but hadn't done the kvm/tcg test yet. It had a few
conflicts so I put it at the end... can rebase if you'd really prefer.

>
> > report_prefix_pop();
> >   }
> > diff --git a/powerpc/tm.c b/powerpc/tm.c
> > index 6b1ceeb6e..d9e7f455d 100644
> > --- a/powerpc/tm.c
> > +++ b/powerpc/tm.c
> > @@ -133,7 +133,8 @@ int main(int argc, char **argv)
> > report_skip("TM is not available");
> > goto done;
> > }
> > -   report(cpus_with_tm == nr_cpus,
> > +   /* KVM does not report TM in secondary threads in POWER9 */
> > +   report_kfail(true, cpus_with_tm == nr_cpus,
> >"TM available in all 'ibm,pa-features' properties");
>
> Could you check the PVR for POWER9 here instead of using "true" as first 
> parameter?

Also covered in patch 30.

Thanks,
Nick


[powerpc:merge] BUILD SUCCESS bbd9f1ec54a72b7a68ed18ad2281f453ee06d606

2024-05-06 Thread kernel test robot
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git 
merge
branch HEAD: bbd9f1ec54a72b7a68ed18ad2281f453ee06d606  powerpc/ci: Don't upload 
allconfig artifacts

elapsed time: 990m

configs tested: 209
configs skipped: 4

The following configs have been built successfully.
More configs may be tested in the coming days.

tested configs:
alpha allnoconfig   gcc  
alphaallyesconfig   gcc  
alpha   defconfig   gcc  
arc  allmodconfig   gcc  
arc   allnoconfig   gcc  
arc  allyesconfig   gcc  
arc defconfig   gcc  
arc   randconfig-001-20240507   gcc  
arc   randconfig-002-20240507   gcc  
arm  allmodconfig   gcc  
arm   allnoconfig   clang
arm  allyesconfig   gcc  
arm defconfig   clang
arm   randconfig-001-20240507   gcc  
arm   randconfig-002-20240507   clang
arm   randconfig-003-20240507   gcc  
arm   randconfig-004-20240507   clang
arm   spitz_defconfig   gcc  
arm64allmodconfig   clang
arm64 allnoconfig   gcc  
arm64   defconfig   gcc  
arm64 randconfig-001-20240507   clang
arm64 randconfig-002-20240507   clang
arm64 randconfig-003-20240507   clang
arm64 randconfig-004-20240507   clang
csky allmodconfig   gcc  
csky  allnoconfig   gcc  
csky allyesconfig   gcc  
cskydefconfig   gcc  
csky  randconfig-001-20240507   gcc  
csky  randconfig-002-20240507   gcc  
hexagon  allmodconfig   clang
hexagon   allnoconfig   clang
hexagon  allyesconfig   clang
hexagon defconfig   clang
hexagon   randconfig-001-20240507   clang
hexagon   randconfig-002-20240507   clang
i386 allmodconfig   gcc  
i386  allnoconfig   gcc  
i386 allyesconfig   gcc  
i386 buildonly-randconfig-001-20240506   gcc  
i386 buildonly-randconfig-001-20240507   clang
i386 buildonly-randconfig-002-20240506   clang
i386 buildonly-randconfig-002-20240507   clang
i386 buildonly-randconfig-003-20240506   gcc  
i386 buildonly-randconfig-003-20240507   clang
i386 buildonly-randconfig-004-20240506   gcc  
i386 buildonly-randconfig-005-20240506   gcc  
i386 buildonly-randconfig-006-20240506   clang
i386 buildonly-randconfig-006-20240507   clang
i386defconfig   clang
i386  randconfig-001-20240506   gcc  
i386  randconfig-001-20240507   clang
i386  randconfig-002-20240506   clang
i386  randconfig-003-20240506   gcc  
i386  randconfig-003-20240507   clang
i386  randconfig-004-20240506   clang
i386  randconfig-004-20240507   clang
i386  randconfig-005-20240506   clang
i386  randconfig-005-20240507   clang
i386  randconfig-006-20240506   gcc  
i386  randconfig-006-20240507   clang
i386  randconfig-011-20240506   gcc  
i386  randconfig-012-20240506   gcc  
i386  randconfig-012-20240507   clang
i386  randconfig-013-20240506   gcc  
i386  randconfig-013-20240507   clang
i386  randconfig-014-20240506   clang
i386  randconfig-015-20240506   clang
i386  randconfig-016-20240506   clang
i386  randconfig-016-20240507   clang
loongarchallmodconfig   gcc  
loongarch allnoconfig   gcc  
loongarchallyesconfig   gcc  
loongarch   defconfig   gcc  
loongarch randconfig-001-20240507   gcc  
loongarch randconfig-002-20240507   gcc  
m68k allmodconfig   gcc  
m68k  allnoconfig   gcc  
m68k allyesconfig   gcc  
m68kdefconfig   gcc  
m68k   virt_defconfig   gcc  
microblaze   allmodconfig   gcc  
microblazeallnoconfig   gcc  
microblaze   allyesconfig   gcc  
micro

Re: [kvm-unit-tests PATCH v9 01/31] doc: update unittests doc

2024-05-06 Thread Nicholas Piggin
On Mon May 6, 2024 at 5:03 PM AEST, Thomas Huth wrote:
> On 04/05/2024 14.28, Nicholas Piggin wrote:
> > This adds a few minor fixes.
> > 
> > Signed-off-by: Nicholas Piggin 
> > ---
> >   docs/unittests.txt | 12 +++-
> >   1 file changed, 7 insertions(+), 5 deletions(-)
> > 
> > diff --git a/docs/unittests.txt b/docs/unittests.txt
> > index 3192a60ec..7cf2c55ad 100644
> > --- a/docs/unittests.txt
> > +++ b/docs/unittests.txt
> > @@ -15,8 +15,8 @@ unittests.cfg format
> >   
> >   # is the comment symbol, all following contents of the line is ignored.
> >   
> > -Each unit test is defined with a [unit-test-name] line, followed by
> > -a set of parameters that control how the test case is run. The name is
> > +Each unit test is defined with a [unit-test-name] line, followed by a
> > +set of parameters that control how the test case is run. The name is
> >   arbitrary and appears in the status reporting output.
> >   
> >   Parameters appear on their own lines under the test name, and have a
> > @@ -62,8 +62,8 @@ groups
> >   groups =   ...
> >   
> >   Used to group the test cases for the `run_tests.sh -g ...` run group
> > -option. Adding a test to the nodefault group will cause it to not be
> > -run by default.
> > +option. The group name is arbitrary, aside from the nodefault group
> > +which makes the test to not be run by default.
> >   
> >   accel
> >   -
> > @@ -82,8 +82,10 @@ Optional timeout in seconds, after which the test will 
> > be killed and fail.
> >   
> >   check
> >   -
> > -check = =<
> > +check = =
> >   
> >   Check a file for a particular value before running a test. The check line
> >   can contain multiple files to check separated by a space, but each check
> >   parameter needs to be of the form =
> > +
> > +The path and value can not contain space, =, or shell wildcard characters.
>
> Could you comment on my feedback here, please:
>
>   https://lore.kernel.org/kvm/951ccd88-0e39-4379-8d86-718e72594...@redhat.com/

Sorry, missed that. I didn't mean to re-send this one.

Thanks,
Nick


Re: linux-next: manual merge of the risc-v tree with the powerpc tree

2024-05-06 Thread Michael Ellerman
Stephen Rothwell  writes:
> Hi all,
>
> Today's linux-next merge of the risc-v tree got conflicts in:
>
>   include/uapi/linux/prctl.h
>   kernel/sys.c
>
> between commit:
>
>   628d701f2de5 ("powerpc/dexcr: Add DEXCR prctl interface")
>
> from the powerpc tree and commit:
>
>   6b9391b581fd ("riscv: Include riscv_set_icache_flush_ctx prctl")
>
> from the risc-v tree.
>
> I fixed it up (see below) and can carry the fix as necessary. This
> is now fixed as far as linux-next is concerned, but any non trivial
> conflicts should be mentioned to your upstream maintainer when your tree
> is submitted for merging.  You may also want to consider cooperating
> with the maintainer of the conflicting tree to minimise any particularly
> complex conflicts.

Thanks.

As you would have seen, I accounted for 71 being taken by
PR_RISCV_SET_ICACHE_FLUSH_CTX in my tree, so this is just a textual
conflict.

So should be nothing to do other than mention it to Linus.

cheers

> diff --cc include/uapi/linux/prctl.h
> index 713d28788df7,524d546d697b..
> --- a/include/uapi/linux/prctl.h
> +++ b/include/uapi/linux/prctl.h
> @@@ -306,20 -306,10 +306,26 @@@ struct prctl_mm_map 
>   # define PR_RISCV_V_VSTATE_CTRL_NEXT_MASK   0xc
>   # define PR_RISCV_V_VSTATE_CTRL_MASK0x1f
>   
> + #define PR_RISCV_SET_ICACHE_FLUSH_CTX   71
> + # define PR_RISCV_CTX_SW_FENCEI_ON  0
> + # define PR_RISCV_CTX_SW_FENCEI_OFF 1
> + # define PR_RISCV_SCOPE_PER_PROCESS 0
> + # define PR_RISCV_SCOPE_PER_THREAD  1
> + 
>  +/* PowerPC Dynamic Execution Control Register (DEXCR) controls */
>  +#define PR_PPC_GET_DEXCR72
>  +#define PR_PPC_SET_DEXCR73
>  +/* DEXCR aspect to act on */
>  +# define PR_PPC_DEXCR_SBHE  0 /* Speculative branch hint enable */
>  +# define PR_PPC_DEXCR_IBRTPD1 /* Indirect branch recurrent 
> target prediction disable */
>  +# define PR_PPC_DEXCR_SRAPD 2 /* Subroutine return address 
> prediction disable */
>  +# define PR_PPC_DEXCR_NPHIE 3 /* Non-privileged hash instruction 
> enable */
>  +/* Action to apply / return */
>  +# define PR_PPC_DEXCR_CTRL_EDITABLE  0x1 /* Aspect can be modified with 
> PR_PPC_SET_DEXCR */
>  +# define PR_PPC_DEXCR_CTRL_SET   0x2 /* Set the aspect for this 
> process */
>  +# define PR_PPC_DEXCR_CTRL_CLEAR 0x4 /* Clear the aspect for this 
> process */
>  +# define PR_PPC_DEXCR_CTRL_SET_ONEXEC0x8 /* Set the aspect on exec 
> */
>  +# define PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC 0x10 /* Clear the aspect on 
> exec */
>  +# define PR_PPC_DEXCR_CTRL_MASK 0x1f
>  +
>   #endif /* _LINUX_PRCTL_H */
> diff --cc kernel/sys.c
> index f9c95410278c,1b7bda0722ca..
> --- a/kernel/sys.c
> +++ b/kernel/sys.c
> @@@ -146,12 -146,9 +146,15 @@@
>   #ifndef RISCV_V_GET_CONTROL
>   # define RISCV_V_GET_CONTROL()  (-EINVAL)
>   #endif
> + #ifndef RISCV_SET_ICACHE_FLUSH_CTX
> + # define RISCV_SET_ICACHE_FLUSH_CTX(a, b)   (-EINVAL)
> + #endif
>  +#ifndef PPC_GET_DEXCR_ASPECT
>  +# define PPC_GET_DEXCR_ASPECT(a, b) (-EINVAL)
>  +#endif
>  +#ifndef PPC_SET_DEXCR_ASPECT
>  +# define PPC_SET_DEXCR_ASPECT(a, b, c)  (-EINVAL)
>  +#endif
>   
>   /*
>* this is where the system-wide overflow UID and GID are defined, for


Re: WARNING: CPU: 1 PID: 1 at net/core/netpoll.c:370 netpoll_send_skb+0x1fc/0x20c at boot when netconsole is enabled (kernel v6.9-rc5, v6.8.7, sungem, PowerMac G4 DP)

2024-05-06 Thread Jakub Kicinski
On Tue, 7 May 2024 02:42:58 +0200 Erhard Furtner wrote:
> And indeed without gem_poll_controller() I don't hit the "WARNING: CPU: 1 
> PID: 1 at net/core/netpoll.c:370 netpoll_send_skb+0x1fc/0x20c" and "WARNING: 
> CPU: 1 PID: 1 at kernel/locking/irqflag-debug.c:10 
> warn_bogus_irq_restore+0x30/0x44" or the according lockdep bug at boot!
> 
> Re-booted the machine about 20 times without anything suspicious showing up 
> in the dmesg. With the unpatched kernel I got the WARNING at the 2nd reboot.

Excellent! Do you want to submit that as an official patch?
The explanation is that we can't call disable_irq() from atomic
context (which which netpoll runs). But the callback is no longer
necessary as we can depend on NAPI to do the polling these days.

> What I still get with 'modprobe -v dev_addr_lists_test', even with 
> gem_poll_controller() removed is:
> 
> [...]
> KTAP version 1
> 1..1
> KTAP version 1
> # Subtest: dev-addr-list-test
> # module: dev_addr_lists_test
> 1..6
> 
> 
> WARNING: kunit_try_catch/1770 still has locks held!
> 6.9.0-rc6-PMacG4-dirty #5 Tainted: GWN
> 
> 1 lock held by kunit_try_catch/1770:
>  #0: c0dbfce4 (rtnl_mutex){}-{3:3}, at: dev_addr_test_init+0xbc/0xc8 
> [dev_addr_lists_test]

I think that's fixed in net-next.


Re: WARNING: CPU: 1 PID: 1 at net/core/netpoll.c:370 netpoll_send_skb+0x1fc/0x20c at boot when netconsole is enabled (kernel v6.9-rc5, v6.8.7, sungem, PowerMac G4 DP)

2024-05-06 Thread Erhard Furtner
On Mon, 6 May 2024 07:26:45 -0700
Jakub Kicinski  wrote:

> On Sun, 5 May 2024 23:27:13 +0200 Erhard Furtner wrote:
> > > On Sun, 28 Apr 2024 12:53:06 +0200 Erhard Furtner wrote:
> > > > With netconsole enabled I get this "WARNING: CPU: 1 PID: 1 at
> > > > net/core/netpoll.c:370 netpoll_send_skb+0x1fc/0x20c" and "WARNING:
> > > > CPU: 1 PID: 1 at kernel/locking/irqflag-debug.c:10
> > > > warn_bogus_irq_restore+0x30/0x44" at boot on my PowerMac G4 DP.
> > > > Happens more often than not (6-7 out of 10 times booting):  
> > > 
> > > Could you try with LOCKDEP enabled?
> > > I wonder if irqs_disabled() behaves differently than we expect.
> > 
> > Ok, after a few tries I got a "BUG: spinlock wrong CPU on CPU#0, 
> > swapper/0/1" LOCKDEP hit. But this does not happen every time when I get 
> > the netpoll_send WARNING:  
> 
> Oh, can you try deleting the gem_poll_controller() function?
> Unhook it from ndo_poll_controller and remove it completely.

Ok, this is the resulting diff:

diff --git a/drivers/net/ethernet/sun/sungem.c 
b/drivers/net/ethernet/sun/sungem.c
index 9bd1df8308d2..d3a2fbb14140 100644
--- a/drivers/net/ethernet/sun/sungem.c
+++ b/drivers/net/ethernet/sun/sungem.c
@@ -949,17 +949,6 @@ static irqreturn_t gem_interrupt(int irq, void *dev_id)
return IRQ_HANDLED;
 }
 
-#ifdef CONFIG_NET_POLL_CONTROLLER
-static void gem_poll_controller(struct net_device *dev)
-{
-   struct gem *gp = netdev_priv(dev);
-
-   disable_irq(gp->pdev->irq);
-   gem_interrupt(gp->pdev->irq, dev);
-   enable_irq(gp->pdev->irq);
-}
-#endif
-
 static void gem_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
struct gem *gp = netdev_priv(dev);
@@ -2839,9 +2828,6 @@ static const struct net_device_ops gem_netdev_ops = {
.ndo_change_mtu = gem_change_mtu,
.ndo_validate_addr  = eth_validate_addr,
.ndo_set_mac_address= gem_set_mac_address,
-#ifdef CONFIG_NET_POLL_CONTROLLER
-   .ndo_poll_controller= gem_poll_controller,
-#endif
 };
 
 static int gem_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)


And indeed without gem_poll_controller() I don't hit the "WARNING: CPU: 1 PID: 
1 at net/core/netpoll.c:370 netpoll_send_skb+0x1fc/0x20c" and "WARNING: CPU: 1 
PID: 1 at kernel/locking/irqflag-debug.c:10 warn_bogus_irq_restore+0x30/0x44" 
or the according lockdep bug at boot!

Re-booted the machine about 20 times without anything suspicious showing up in 
the dmesg. With the unpatched kernel I got the WARNING at the 2nd reboot.

What I still get with 'modprobe -v dev_addr_lists_test', even with 
gem_poll_controller() removed is:

[...]
KTAP version 1
1..1
KTAP version 1
# Subtest: dev-addr-list-test
# module: dev_addr_lists_test
1..6


WARNING: kunit_try_catch/1770 still has locks held!
6.9.0-rc6-PMacG4-dirty #5 Tainted: GWN

1 lock held by kunit_try_catch/1770:
 #0: c0dbfce4 (rtnl_mutex){}-{3:3}, at: dev_addr_test_init+0xbc/0xc8 
[dev_addr_lists_test]

stack backtrace:
CPU: 0 PID: 1770 Comm: kunit_try_catch Tainted: GWN 
6.9.0-rc6-PMacG4-dirty #5
Hardware name: PowerMac3,6 7455 0x80010303 PowerMac
Call Trace:
[f3749ef0] [c07c2bec] dump_stack_lvl+0x80/0xac (unreliable)
[f3749f10] [c004fe64] do_exit+0x5b4/0x834
[f3749f60] [c006d848] kthread_complete_and_exit+0x0/0x28
[f3749f80] [c006d870] kthread+0x0/0xe8
[f3749fa0] [bebf0cf4] kunit_try_catch_run+0x0/0x15c [kunit]
[f3749fc0] [c006d954] kthread+0xe4/0xe8
[f3749ff0] [c0015304] start_kernel_thread+0x10/0x14
ok 1 dev_addr_test_basic
ok 2 dev_addr_test_sync_one
ok 3 dev_addr_test_add_del
ok 4 dev_addr_test_del_main
ok 5 dev_addr_test_add_set
ok 6 dev_addr_test_add_excl
# dev-addr-list-test: pass:6 fail:0 skip:0 total:6
# Totals: pass:6 fail:0 skip:0 total:6
ok 1 dev-addr-list-test
[...]

Regards,
Erhard


linux-next: manual merge of the risc-v tree with the powerpc tree

2024-05-06 Thread Stephen Rothwell
Hi all,

Today's linux-next merge of the risc-v tree got conflicts in:

  include/uapi/linux/prctl.h
  kernel/sys.c

between commit:

  628d701f2de5 ("powerpc/dexcr: Add DEXCR prctl interface")

from the powerpc tree and commit:

  6b9391b581fd ("riscv: Include riscv_set_icache_flush_ctx prctl")

from the risc-v tree.

I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging.  You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

-- 
Cheers,
Stephen Rothwell

diff --cc include/uapi/linux/prctl.h
index 713d28788df7,524d546d697b..
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@@ -306,20 -306,10 +306,26 @@@ struct prctl_mm_map 
  # define PR_RISCV_V_VSTATE_CTRL_NEXT_MASK 0xc
  # define PR_RISCV_V_VSTATE_CTRL_MASK  0x1f
  
+ #define PR_RISCV_SET_ICACHE_FLUSH_CTX 71
+ # define PR_RISCV_CTX_SW_FENCEI_ON0
+ # define PR_RISCV_CTX_SW_FENCEI_OFF   1
+ # define PR_RISCV_SCOPE_PER_PROCESS   0
+ # define PR_RISCV_SCOPE_PER_THREAD1
+ 
 +/* PowerPC Dynamic Execution Control Register (DEXCR) controls */
 +#define PR_PPC_GET_DEXCR  72
 +#define PR_PPC_SET_DEXCR  73
 +/* DEXCR aspect to act on */
 +# define PR_PPC_DEXCR_SBHE0 /* Speculative branch hint enable */
 +# define PR_PPC_DEXCR_IBRTPD  1 /* Indirect branch recurrent target 
prediction disable */
 +# define PR_PPC_DEXCR_SRAPD   2 /* Subroutine return address 
prediction disable */
 +# define PR_PPC_DEXCR_NPHIE   3 /* Non-privileged hash instruction 
enable */
 +/* Action to apply / return */
 +# define PR_PPC_DEXCR_CTRL_EDITABLE0x1 /* Aspect can be modified with 
PR_PPC_SET_DEXCR */
 +# define PR_PPC_DEXCR_CTRL_SET 0x2 /* Set the aspect for this 
process */
 +# define PR_PPC_DEXCR_CTRL_CLEAR   0x4 /* Clear the aspect for this 
process */
 +# define PR_PPC_DEXCR_CTRL_SET_ONEXEC  0x8 /* Set the aspect on exec */
 +# define PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC   0x10 /* Clear the aspect on 
exec */
 +# define PR_PPC_DEXCR_CTRL_MASK   0x1f
 +
  #endif /* _LINUX_PRCTL_H */
diff --cc kernel/sys.c
index f9c95410278c,1b7bda0722ca..
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@@ -146,12 -146,9 +146,15 @@@
  #ifndef RISCV_V_GET_CONTROL
  # define RISCV_V_GET_CONTROL()(-EINVAL)
  #endif
+ #ifndef RISCV_SET_ICACHE_FLUSH_CTX
+ # define RISCV_SET_ICACHE_FLUSH_CTX(a, b) (-EINVAL)
+ #endif
 +#ifndef PPC_GET_DEXCR_ASPECT
 +# define PPC_GET_DEXCR_ASPECT(a, b)   (-EINVAL)
 +#endif
 +#ifndef PPC_SET_DEXCR_ASPECT
 +# define PPC_SET_DEXCR_ASPECT(a, b, c)(-EINVAL)
 +#endif
  
  /*
   * this is where the system-wide overflow UID and GID are defined, for


pgpA3BQRq0C_8.pgp
Description: OpenPGP digital signature


Re: [PATCH v3] kprobe/ftrace: bail out if ftrace was killed

2024-05-06 Thread Steven Rostedt
On Mon, 6 May 2024 14:46:57 +
Christophe Leroy  wrote:

> Isn't it safer to provide a fonction rather than a direct access to a 
> variable ?
> 
> By the way, wouldn't it be more performant to use a static branch (jump 
> label) ?

A static branch could work, but the point of this is that if ftrace
failed, it was likely due to an issue with text modification. Do we want to
stop it via text modification?

-- Steve


Re: [PATCH 0/7] Remove 40x

2024-05-06 Thread Rob Herring (Arm)


On Mon, 06 May 2024 22:51:45 +1000, Michael Ellerman wrote:
> The 40x platforms & CPUs are old and have been unmaintained for years, and as
> far as we can tell have no users.
> 
> Note 44x and 476 are not affected.
> 
> Christophe first proposed removing 40x in 2020:
>   
> https://lore.kernel.org/linuxppc-dev/40899eb1b8f10da3706acd06c3d46d2418c8886e.1585635837.git.christophe.le...@c-s.fr/
> 
> And on a few subsequent occasions.
> 
> The proposed removal was covered on Phronix:
>   https://www.phoronix.com/news/PowerPC-40x-400-Linux-Removal
> 
> OpenWRT dropped support in 2019.
> 
> So unless anyone steps up to maintain it, remove 40x as unused. RIP.
> 
> cheers
> 
> 
> More context: https://github.com/linuxppc/issues/issues/349
> 
> Christophe Leroy (4):
>   powerpc/40x: Remove 40x platforms.
>   powerpc/boot: Remove all 40x platforms from boot
>   powerpc: Remove core support for 40x
>   powerpc/platforms: Move files from 4xx to 44x
> 
> Michael Ellerman (3):
>   powerpc: Remove 40x from Kconfig and defconfig
>   powerpc/4xx: Remove CONFIG_BOOKE_OR_40x
>   powerpc: Replace CONFIG_4xx with CONFIG_44x
> 
>  MAINTAINERS   |   1 -
>  arch/powerpc/Kconfig  |  17 +-
>  arch/powerpc/Kconfig.debug|  13 -
>  arch/powerpc/Makefile |   5 -
>  arch/powerpc/boot/4xx.c   | 266 ---
>  arch/powerpc/boot/4xx.h   |   4 -
>  arch/powerpc/boot/Makefile|  11 -
>  arch/powerpc/boot/cuboot-acadia.c | 171 -
>  arch/powerpc/boot/cuboot-hotfoot.c| 139 
>  arch/powerpc/boot/cuboot-kilauea.c|  46 --
>  arch/powerpc/boot/dcr.h   |  11 -
>  arch/powerpc/boot/dts/acadia.dts  | 224 --
>  arch/powerpc/boot/dts/hotfoot.dts | 296 ---
>  arch/powerpc/boot/dts/kilauea.dts | 407 --
>  arch/powerpc/boot/dts/obs600.dts  | 314 
>  arch/powerpc/boot/ppcboot-hotfoot.h   | 119 ---
>  arch/powerpc/boot/ppcboot.h   |   2 +-
>  arch/powerpc/configs/40x.config   |   2 -
>  arch/powerpc/configs/40x/acadia_defconfig |  61 --
>  arch/powerpc/configs/40x/kilauea_defconfig|  69 --
>  arch/powerpc/configs/40x/klondike_defconfig   |  43 --
>  arch/powerpc/configs/40x/makalu_defconfig |  59 --
>  arch/powerpc/configs/40x/obs600_defconfig |  69 --
>  arch/powerpc/configs/40x/walnut_defconfig |  55 --
>  arch/powerpc/configs/ppc40x_defconfig |  74 --
>  arch/powerpc/include/asm/cacheflush.h |   2 +-
>  arch/powerpc/include/asm/cputable.h   |   7 -
>  arch/powerpc/include/asm/hw_irq.h |   8 +-
>  arch/powerpc/include/asm/irq.h|   2 +-
>  arch/powerpc/include/asm/kup.h|   2 +-
>  arch/powerpc/include/asm/mmu.h|   7 -
>  arch/powerpc/include/asm/nohash/32/mmu-40x.h  |  68 --
>  arch/powerpc/include/asm/nohash/32/pgtable.h  |   4 +-
>  arch/powerpc/include/asm/nohash/32/pte-40x.h  |  73 --
>  arch/powerpc/include/asm/nohash/mmu.h |   5 +-
>  arch/powerpc/include/asm/ppc_asm.h|   2 +-
>  arch/powerpc/include/asm/processor.h  |   2 +-
>  arch/powerpc/include/asm/ptrace.h |   2 +-
>  arch/powerpc/include/asm/reg.h|  27 +-
>  arch/powerpc/include/asm/reg_booke.h  | 113 +--
>  arch/powerpc/include/asm/time.h   |   7 +-
>  arch/powerpc/include/asm/udbg.h   |   1 -
>  arch/powerpc/kernel/Makefile  |   1 -
>  arch/powerpc/kernel/asm-offsets.c |   2 +-
>  arch/powerpc/kernel/cpu_specs.h   |   4 -
>  arch/powerpc/kernel/cpu_specs_40x.h   | 280 ---
>  arch/powerpc/kernel/entry_32.S|  48 +-
>  arch/powerpc/kernel/epapr_hcalls.S|   2 +-
>  arch/powerpc/kernel/head_32.h |  12 +-
>  arch/powerpc/kernel/head_40x.S| 721 --
>  arch/powerpc/kernel/head_booke.h  |   3 +-
>  arch/powerpc/kernel/irq.c |   2 +-
>  arch/powerpc/kernel/kgdb.c|   4 +-
>  arch/powerpc/kernel/misc_32.S |  40 -
>  arch/powerpc/kernel/process.c |   4 +-
>  arch/powerpc/kernel/setup.h   |   2 +-
>  arch/powerpc/kernel/setup_32.c|   2 +-
>  arch/powerpc/kernel/time.c|   2 +-
>  arch/powerpc/kernel/traps.c   |   2 +-
>  arch/powerpc/kernel/udbg.c|   3 -
>  arch/powerpc/kernel/udbg_16550.c  |  23 -
>  arch/powerpc/mm/fault.c   |   4 +-
>  arch/powerpc/mm/mmu_context.c |   2 +-
>  arch/powerpc/mm/mmu_decl.h|   8 +-
>  arch/powerpc/mm/nohash/40x.c  | 161 
>  arch/powerpc/mm/nohash/Makefile   |   1 -
>  a

Re: [PATCH v8 13/17] x86/ftrace: enable dynamic ftrace without CONFIG_MODULES

2024-05-06 Thread Steven Rostedt
On Sun,  5 May 2024 17:25:56 +0300
Mike Rapoport  wrote:

> From: "Mike Rapoport (IBM)" 
> 
> Dynamic ftrace must allocate memory for code and this was impossible
> without CONFIG_MODULES.
> 
> With execmem separated from the modules code, execmem_text_alloc() is
> available regardless of CONFIG_MODULES.
> 
> Remove dependency of dynamic ftrace on CONFIG_MODULES and make
> CONFIG_DYNAMIC_FTRACE select CONFIG_EXECMEM in Kconfig.
> 
> Signed-off-by: Mike Rapoport (IBM) 
> ---
>  arch/x86/Kconfig |  1 +
>  arch/x86/kernel/ftrace.c | 10 --
>  2 files changed, 1 insertion(+), 10 deletions(-)

Reviewed-by: Steven Rostedt (Google) 

-- Steve


Re: [PATCH v8 06/17] mm: introduce execmem_alloc() and execmem_free()

2024-05-06 Thread Steven Rostedt
On Sun,  5 May 2024 17:25:49 +0300
Mike Rapoport  wrote:

> diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
> index 70139d9d2e01..c8ddb7abda7c 100644
> --- a/arch/x86/kernel/ftrace.c
> +++ b/arch/x86/kernel/ftrace.c
> @@ -25,6 +25,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include 
>  
> @@ -261,15 +262,14 @@ void arch_ftrace_update_code(int command)
>  #ifdef CONFIG_X86_64
>  
>  #ifdef CONFIG_MODULES
> -#include 
>  /* Module allocation simplifies allocating memory for code */
>  static inline void *alloc_tramp(unsigned long size)
>  {
> - return module_alloc(size);
> + return execmem_alloc(EXECMEM_FTRACE, size);
>  }
>  static inline void tramp_free(void *tramp)
>  {
> - module_memfree(tramp);
> + execmem_free(tramp);
>  }
>  #else
>  /* Trampolines can only be created if modules are supported */
> diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c

Acked-by: Steven Rostedt (Google) 

-- Steve


Re: [RFC PATCH v2 0/6] powerpc: pSeries: vfio: iommu: Re-enable support for SPAPR TCE VFIO

2024-05-06 Thread Jason Gunthorpe
On Sat, May 04, 2024 at 12:33:53AM +0530, Shivaprasad G Bhat wrote:
> We have legacy workloads using VFIO in userspace/kvm guests running
> on downstream distro kernels. We want these workloads to be able to
> continue running on our arch.

It has been broken since 2018, I don't find this reasoning entirely
reasonable :\

> I firmly believe the refactoring in this patch series is a step in
> that direction.

But fine, as long as we are going to fix it. PPC really needs this to
be resolved to keep working.

Jason


Re: [RFC PATCH 1/2] objtool: Run objtool only if either of the config options are selected

2024-05-06 Thread Masahiro Yamada
On Tue, Apr 23, 2024 at 1:19 AM Sathvika Vasireddy
 wrote:
>
> Hi Masahiro, thanks for reviewing.
>
> On 4/22/24 5:39 PM, Masahiro Yamada wrote:
>
> On Mon, Apr 22, 2024 at 6:25 PM Sathvika Vasireddy  wrote:
>
> Currently, when objtool is enabled and none of the supported options
> are triggered, kernel build errors out with the below error:
> error: objtool: At least one command required.
>
> Then, I think CONFIG_OBJTOOL should be disabled.
>
> A subsequent patch introduces --ftr-fixup as an option to objtool to do 
> feature fixup at build-time via CONFIG_HAVE_OBJTOOL_FTR_FIXUP option. If 
> CONFIG_OBJTOOL is not selected, then objtool cannot be used to pass 
> --ftr-fixup option.
>
> In cases where none of the supported options (like --mcount on powerpc for 
> example) is triggered, but still require --ftr-fixup option to be passed to 
> objtool, we see "error: objtool: At least one command required" errors. So, 
> to address this, run only when either of the config options are selected.
>
> Thanks,
> Sathvika



Same as my first comment.


Bad things happen because you select OBJTOOL.

Preferably, this should be a separate program
as in the first draft, but if you insist on
integrating it into objtool, I recommend keeping
CONFIG_OBJTOOL and CONFIG_HAVE_OBJTOOL_FTR_FIXUP
as separate, unlated options.


I attach a fix-up patch applicable on top of your work.




-- 
Best Regards
Masahiro Yamada
diff --git a/Makefile b/Makefile
index 40fb2ca6fe4c..c5ac01274893 100644
--- a/Makefile
+++ b/Makefile
@@ -1327,6 +1327,13 @@ ifdef CONFIG_OBJTOOL
 prepare: tools/objtool
 endif
 
+# CONFIG_OBJTOOL and CONFIG_HAVE_OBJTOOL_FTR_FIXUP are unrelated, separate
+# options. It was integrated in objtool in order to borrow the elf parser,
+# but this is different from how the other objtool commands are used.
+ifdef CONFIG_HAVE_OBJTOOL_FTR_FIXUP
+prepare: tools/objtool
+endif
+
 ifdef CONFIG_BPF
 ifdef CONFIG_DEBUG_INFO_BTF
 prepare: tools/bpf/resolve_btfids
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 806285a28231..564b73cbfa3d 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -26,7 +26,7 @@ config 64BIT
 config HAVE_OBJTOOL_FTR_FIXUP
 bool
 default y if CPU_LITTLE_ENDIAN && PPC64
-select OBJTOOL
+	# HAVE_OBJTOOL_FTR_FIXUP must not select OBJTOOL
 
 config LIVEPATCH_64
 	def_bool PPC64
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index 8fff27b9bdcb..855ad097f85e 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -257,10 +257,10 @@ dtc_cpp_flags  = -Wp,-MMD,$(depfile).pre.tmp -nostdinc\
 		 $(addprefix -I,$(DTC_INCLUDE))  \
 		 -undef -D__DTS__
 
-ifdef CONFIG_OBJTOOL
-
 objtool := $(objtree)/tools/objtool/objtool
 
+ifdef CONFIG_OBJTOOL
+
 objtool-args-$(CONFIG_HAVE_JUMP_LABEL_HACK)		+= --hacks=jump_label
 objtool-args-$(CONFIG_HAVE_NOINSTR_HACK)		+= --hacks=noinstr
 objtool-args-$(CONFIG_MITIGATION_CALL_DEPTH_TRACKING)	+= --hacks=skylake
@@ -286,16 +286,7 @@ objtool-args = $(objtool-args-y)	\
 
 delay-objtool := $(or $(CONFIG_LTO_CLANG),$(CONFIG_X86_KERNEL_IBT))
 
-ifneq ($(objtool-args-y),)
 cmd_objtool = $(if $(objtool-enabled), ; $(objtool) $(objtool-args) $@)
-endif
-
-cmd_objtool_vmlinux :=
-ifeq ($(CONFIG_HAVE_OBJTOOL_FTR_FIXUP),y)
-cmd_objtool_vmlinux = $(if $(objtool-enabled), ; $(objtool) $(objtool-args) $@)
-vmlinux:
-$(cmd_objtool_vmlinux)
-endif
 
 cmd_gen_objtooldep = $(if $(objtool-enabled), { echo ; echo '$@: $$(wildcard $(objtool))' ; } >> $(dot-target).cmd)
 
diff --git a/scripts/Makefile.vmlinux b/scripts/Makefile.vmlinux
index 2f4a7154e676..f02f99c6f355 100644
--- a/scripts/Makefile.vmlinux
+++ b/scripts/Makefile.vmlinux
@@ -58,10 +58,9 @@ existing-targets := $(wildcard $(sort $(targets)))
 # --
 #
 #  For feature fixup, objtool does not run on individual
-#  translation units. Run this on vmlinux instead.
+#  translation units. Run this on vmlinux instead. Only for PowerPC.
+#  The other objtool commands work on individual objects or vmlinux.o.
 
-objtool-enabled := $(CONFIG_HAVE_OBJTOOL_FTR_FIXUP)
-
-vmlinux-objtool-args-$(CONFIG_HAVE_OBJTOOL_FTR_FIXUP)  += --ftr-fixup
-
-objtool-args = $(vmlinux-objtool-args-y) --link
+ifdef CONFIG_HAVE_OBJTOOL_FTR_FIXUP
+cmd_objtool_vmlinux = ; $(objtool) --ftr-fixup --link $@
+endif


Re: [RFC PATCH 2/2] objtool/powerpc: Enhance objtool to fixup alternate feature relative addresses

2024-05-06 Thread Masahiro Yamada
On Mon, Apr 22, 2024 at 6:25 PM Sathvika Vasireddy  wrote:
>
> Implement build-time fixup of alternate feature relative addresses for
> the out-of-line (else) patch code. Initial posting to achieve the same
> using another tool can be found at [1]. Idea is to implement this using
> objtool instead of introducing another tool since it already has elf
> parsing and processing covered.
>
> Introduce --ftr-fixup as an option to objtool to do feature fixup at
> build-time.
>
> Couple of issues and warnings encountered while implementing feature
> fixup using objtool are as follows:
>
> 1. libelf is creating corrupted vmlinux file after writing necessary
> changes to the file. Due to this, kexec is not able to load new
> kernel.
>
> It gives the following error:
> ELF Note corrupted !
> Cannot determine the file type of vmlinux
>
> To fix this issue, after opening vmlinux file, make a call to
> elf_flagelf (e, ELF_C_SET, ELF_F_LAYOUT). This instructs libelf not
> to touch the segment and section layout. It informs the library
> that the application will take responsibility for the layout of the
> file and that the library should not insert any padding between
> sections.
>
> 2. Fix can't find starting instruction warnings when run on vmlinux
>
> Objtool throws a lot of can't find starting instruction warnings
> when run on vmlinux with --ftr-fixup option.
>
> These warnings are seen because find_insn() function looks for
> instructions at offsets that are relative to the start of the section.
> In case of individual object files (.o), there are no can't find
> starting instruction warnings seen because the actual offset
> associated with an instruction is itself a relative offset since the
> sections start at offset 0x0.
>
> However, in case of vmlinux, find_insn() function fails to find
> instructions at the actual offset associated with an instruction
> since the sections in vmlinux do not start at offset 0x0. Due to
> this, find_insn() will look for absolute offset and not the relative
> offset. This is resulting in a lot of can't find starting instruction
> warnings when objtool is run on vmlinux.
>
> To fix this, pass offset that is relative to the start of the section
> to find_insn().
>
> find_insn() is also looking for symbols of size 0. But, objtool does
> not store empty STT_NOTYPE symbols in the rbtree. Due to this,
> for empty symbols, objtool is throwing can't find starting
> instruction warnings. Fix this by ignoring symbols that are of
> size 0 since objtool does not add them to the rbtree.
>
> 3. Objtool is throwing unannotated intra-function call warnings
> when run on vmlinux with --ftr-fixup option.
>
> One such example:
>
> vmlinux: warning: objtool: .text+0x3d94:
> unannotated intra-function call
>
> .text + 0x3d94 = c0008000 + 3d94 = c00081d4
>
> c00081d4: 45 24 02 48  bl c002a618
> 
>
> c002a610 :
> c002a610:   0e 01 4c 3c addis   r2,r12,270
> c002a610: R_PPC64_REL16_HA.TOC.
> c002a614:   f0 6c 42 38 addir2,r2,27888
> c002a614: R_PPC64_REL16_LO.TOC.+0x4
> c002a618:   a6 02 08 7c mflrr0
>
> This is happening because we should be looking for destination
> symbols that are at absolute offsets instead of relative offsets.
> After fixing dest_off to point to absolute offset, there are still
> a lot of these warnings shown.
>
> In the above example, objtool is computing the destination
> offset to be c002a618, which points to a completely
> different instruction. find_call_destination() is looking for this
> offset and failing. Instead, we should be looking for destination
> offset c002a610 which points to system_reset_exception
> function.
>
> Even after fixing the way destination offset is computed, and
> after looking for dest_off - 0x8 in cases where the original offset
> is not found, there are still a lot of unannotated intra-function
> call warnings generated. This is due to symbols that are not
> properly annotated.
>
> So, for now, as a hack to curb these warnings, do not emit
> unannotated intra-function call warnings when objtool is run
> with --ftr-fixup option.
>
> TODO:
> This patch enables build time feature fixup only for powerpc little
> endian configs. There are boot failures with big endian configs.
> Posting this as an initial RFC to get some review comments while I work
> on big endian issues.
>
> [1]
> https://lore.kernel.org/linuxppc-dev/20170521010130.13552-1-npig...@gmail.com/
>
> Co-developed-by: Nicholas Piggin 
> Signed-off-by: Nicholas Piggin 
> Signed-off-by: Sathvika Vasireddy 
> ---
>  arch/Kconfig  |   3 +
>  arch/powerpc/Kconfig  |   5 +
>  arch/powerpc/Makefile |   5 +
>  arch/powerpc/include/asm/feature-fixups.h |  11 +-
>  arch/powerpc/kernel/vmlinux.lds.S |  14 +-

[PATCH 3/3] arch/powerpc/kvm: Reduce lock contention by moving spinlock from ics to irq_state

2024-05-06 Thread Gautam Menghani
Take a spinlock on state of an IRQ instead of an entire ICS. This
improves scalability by reducing contention.

Signed-off-by: Gautam Menghani 
---
 arch/powerpc/kvm/book3s_hv_rm_xics.c |  8 ++---
 arch/powerpc/kvm/book3s_xics.c   | 44 
 arch/powerpc/kvm/book3s_xics.h   |  2 +-
 3 files changed, 23 insertions(+), 31 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c 
b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index e42984878503..178bc869b519 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -308,7 +308,7 @@ static void icp_rm_deliver_irq(struct kvmppc_xics *xics, 
struct kvmppc_icp *icp,
state = &ics->irq_state[src];
 
/* Get a lock on the ICS */
-   arch_spin_lock(&ics->lock);
+   arch_spin_lock(&state->lock);
 
/* Get our server */
if (!icp || state->server != icp->server_num) {
@@ -368,7 +368,7 @@ static void icp_rm_deliver_irq(struct kvmppc_xics *xics, 
struct kvmppc_icp *icp,
 * Delivery was successful, did we reject somebody else ?
 */
if (reject && reject != XICS_IPI) {
-   arch_spin_unlock(&ics->lock);
+   arch_spin_unlock(&state->lock);
icp->n_reject++;
new_irq = reject;
check_resend = 0;
@@ -397,13 +397,13 @@ static void icp_rm_deliver_irq(struct kvmppc_xics *xics, 
struct kvmppc_icp *icp,
smp_mb();
if (!icp->state.need_resend) {
state->resend = 0;
-   arch_spin_unlock(&ics->lock);
+   arch_spin_unlock(&state->lock);
check_resend = 0;
goto again;
}
}
  out:
-   arch_spin_unlock(&ics->lock);
+   arch_spin_unlock(&state->lock);
 }
 
 static void icp_rm_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index 1dc2f77571e7..466c92cf49fb 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -36,21 +36,13 @@
  * LOCKING
  * ===
  *
- * Each ICS has a spin lock protecting the information about the IRQ
- * sources and avoiding simultaneous deliveries of the same interrupt.
+ * Each IRQ has a spin lock protecting its state sources and avoiding
+ * simultaneous deliveries of the same interrupt.
  *
  * ICP operations are done via a single compare & swap transaction
  * (most ICP state fits in the union kvmppc_icp_state)
  */
 
-/*
- * TODO
- * 
- *
- * - Make ICS lockless as well, or at least a per-interrupt lock or hashed
- *   locks array to improve scalability
- */
-
 /* -- ICS routines -- */
 
 static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
@@ -142,7 +134,7 @@ static bool write_xive(struct kvmppc_xics *xics, struct 
kvmppc_ics *ics,
unsigned long flags;
 
local_irq_save(flags);
-   arch_spin_lock(&ics->lock);
+   arch_spin_lock(&state->lock);
 
state->server = server;
state->priority = priority;
@@ -154,7 +146,7 @@ static bool write_xive(struct kvmppc_xics *xics, struct 
kvmppc_ics *ics,
deliver = true;
}
 
-   arch_spin_unlock(&ics->lock);
+   arch_spin_unlock(&state->lock);
local_irq_restore(flags);
 
return deliver;
@@ -207,10 +199,10 @@ int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 
*server, u32 *priority)
state = &ics->irq_state[src];
 
local_irq_save(flags);
-   arch_spin_lock(&ics->lock);
+   arch_spin_lock(&state->lock);
*server = state->server;
*priority = state->priority;
-   arch_spin_unlock(&ics->lock);
+   arch_spin_unlock(&state->lock);
local_irq_restore(flags);
 
return 0;
@@ -406,7 +398,7 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, 
struct kvmppc_icp *icp,
 
/* Get a lock on the ICS */
local_irq_save(flags);
-   arch_spin_lock(&ics->lock);
+   arch_spin_lock(&state->lock);
 
/* Get our server */
if (!icp || state->server != icp->server_num) {
@@ -467,7 +459,7 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, 
struct kvmppc_icp *icp,
 * Delivery was successful, did we reject somebody else ?
 */
if (reject && reject != XICS_IPI) {
-   arch_spin_unlock(&ics->lock);
+   arch_spin_unlock(&state->lock);
local_irq_restore(flags);
new_irq = reject;
check_resend = false;
@@ -497,14 +489,14 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, 
struct kvmppc_icp *icp,
smp_mb();
if (!icp->state.need_resend) {
state->resend = 0;
-   arch

[PATCH 2/3] arch/powerpc/kvm: Optimize the server number -> ICP lookup

2024-05-06 Thread Gautam Menghani
Given a server number, kvmppc_xics_find_server() does a linear search
over the vcpus of a VM. Optimize this logic by using an array to
maintain the mapping between server number -> icp.

Signed-off-by: Gautam Menghani 
---
 arch/powerpc/kvm/book3s_xics.c |  4 ++--
 arch/powerpc/kvm/book3s_xics.h | 10 ++
 2 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index 12de526f04c4..1dc2f77571e7 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -47,8 +47,6 @@
  * TODO
  * 
  *
- * - Speed up server# -> ICP lookup (array ? hash table ?)
- *
  * - Make ICS lockless as well, or at least a per-interrupt lock or hashed
  *   locks array to improve scalability
  */
@@ -1062,6 +1060,7 @@ static struct kvmppc_ics *kvmppc_xics_create_ics(struct 
kvm *kvm,
 static int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long 
server_num)
 {
struct kvmppc_icp *icp;
+   struct kvm *kvm = vcpu->kvm;
 
if (!vcpu->kvm->arch.xics)
return -ENODEV;
@@ -1078,6 +1077,7 @@ static int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, 
unsigned long server_nu
icp->state.mfrr = MASKED;
icp->state.pending_pri = MASKED;
vcpu->arch.icp = icp;
+   kvm->arch.xics->icps[server_num] = icp;
 
XICS_DBG("created server for vcpu %d\n", vcpu->vcpu_id);
 
diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h
index 8fcb34ea47a4..feeb0897d555 100644
--- a/arch/powerpc/kvm/book3s_xics.h
+++ b/arch/powerpc/kvm/book3s_xics.h
@@ -111,19 +111,13 @@ struct kvmppc_xics {
u32 err_noics;
u32 err_noicp;
struct kvmppc_ics *ics[KVMPPC_XICS_MAX_ICS_ID + 1];
+   DECLARE_FLEX_ARRAY(struct kvmppc_icp *, icps);
 };
 
 static inline struct kvmppc_icp *kvmppc_xics_find_server(struct kvm *kvm,
 u32 nr)
 {
-   struct kvm_vcpu *vcpu = NULL;
-   unsigned long i;
-
-   kvm_for_each_vcpu(i, vcpu, kvm) {
-   if (vcpu->arch.icp && nr == vcpu->arch.icp->server_num)
-   return vcpu->arch.icp;
-   }
-   return NULL;
+   return kvm->arch.xics->icps[nr];
 }
 
 static inline struct kvmppc_ics *kvmppc_xics_find_ics(struct kvmppc_xics *xics,
-- 
2.44.0



[PATCH 1/3] arch/powerpc/kvm: Use bitmap to speed up resend of irqs in ICS

2024-05-06 Thread Gautam Menghani
When an irq is to be resent, all 1024 irqs in an ICS are scanned and the
irqs having 'resend' flag set are resent. Optimize this flow using bitmap
array to speed up the resends.

Signed-off-by: Gautam Menghani 
---
 arch/powerpc/kvm/book3s_xics.c | 22 +++---
 arch/powerpc/kvm/book3s_xics.h |  1 +
 2 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index 589a8f257120..12de526f04c4 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -47,9 +47,6 @@
  * TODO
  * 
  *
- * - To speed up resends, keep a bitmap of "resend" set bits in the
- *   ICS
- *
  * - Speed up server# -> ICP lookup (array ? hash table ?)
  *
  * - Make ICS lockless as well, or at least a per-interrupt lock or hashed
@@ -125,15 +122,17 @@ static int ics_deliver_irq(struct kvmppc_xics *xics, u32 
irq, u32 level)
 static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
 struct kvmppc_icp *icp)
 {
-   int i;
+   u32 irq;
+   struct ics_irq_state *state;
 
-   for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
-   struct ics_irq_state *state = &ics->irq_state[i];
-   if (state->resend) {
-   XICS_DBG("resend %#x prio %#x\n", state->number,
- state->priority);
-   icp_deliver_irq(xics, icp, state->number, true);
-   }
+   for_each_set_bit(irq, ics->resend_map, KVMPPC_XICS_IRQ_PER_ICS) {
+   state = &ics->irq_state[irq];
+
+   if (!test_and_clear_bit(irq, ics->resend_map))
+   continue;
+   if (!state)
+   continue;
+   icp_deliver_irq(xics, icp, state->number, true);
}
 }
 
@@ -489,6 +488,7 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, 
struct kvmppc_icp *icp,
 */
smp_wmb();
set_bit(ics->icsid, icp->resend_map);
+   set_bit(src, ics->resend_map);
 
/*
 * If the need_resend flag got cleared in the ICP some time
diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h
index 08fb0843faf5..8fcb34ea47a4 100644
--- a/arch/powerpc/kvm/book3s_xics.h
+++ b/arch/powerpc/kvm/book3s_xics.h
@@ -98,6 +98,7 @@ struct kvmppc_ics {
arch_spinlock_t lock;
u16 icsid;
struct ics_irq_state irq_state[KVMPPC_XICS_IRQ_PER_ICS];
+   DECLARE_BITMAP(resend_map, KVMPPC_XICS_IRQ_PER_ICS);
 };
 
 struct kvmppc_xics {
-- 
2.44.0



[PATCH 0/3] XICS emulation optimizations in KVM for PPC

2024-05-06 Thread Gautam Menghani
Optimize the XICS emulation code in KVM as per the 'performance todos'
in the comments of book3s_xics.c. 

Performance numbers:
1. Test case: Pgbench run in a KVM on PowerVM guest for 120 secs


2. Time taken by arch_send_call_function_single_ipi() currently measured 
with funclatency [1].

$ ./funclatency.py -u arch_send_call_function_single_ipi

usecs   : count distribution
0 -> 1  : 7||
2 -> 3  : 16   ||
4 -> 7  : 141  ||
8 -> 15 : 4455631  ||
16 -> 31 : 437981   |*** |
32 -> 63 : 5036 ||
64 -> 127: 92   ||

avg = 12 usecs, total: 60,532,481 usecs, count: 4,898,904


3. Time taken by arch_send_call_function_single_ipi() with changes in
this series.

$ ./funclatency.py -u arch_send_call_function_single_ipi

usecs   : count distribution
0 -> 1  : 15   ||
2 -> 3  : 7||
4 -> 7  : 3798 ||
8 -> 15 : 4569610  ||
16 -> 31 : 339284   |**  |
32 -> 63 : 4542 ||
64 -> 127: 68   ||
128 -> 255: 0||
256 -> 511: 1||

avg = 11 usecs, total: 57,720,612 usecs, count: 4,917,325

4. This patch series has been also tested on KVM on Power8 CPU.

[1]: https://github.com/iovisor/bcc/blob/master/tools/funclatency.py

Gautam Menghani (3):
  arch/powerpc/kvm: Use bitmap to speed up resend of irqs in ICS
  arch/powerpc/kvm: Optimize the server number -> ICP lookup
  arch/powerpc/kvm: Reduce lock contention by moving spinlock from ics
to irq_state

 arch/powerpc/kvm/book3s_hv_rm_xics.c |  8 ++--
 arch/powerpc/kvm/book3s_xics.c   | 70 
 arch/powerpc/kvm/book3s_xics.h   | 13 ++
 3 files changed, 39 insertions(+), 52 deletions(-)

-- 
2.44.0



[PATCH v6] arch/powerpc/kvm: Add support for reading VPA counters for pseries guests

2024-05-06 Thread Gautam Menghani
PAPR hypervisor has introduced three new counters in the VPA area of
LPAR CPUs for KVM L2 guest (see [1] for terminology) observability - 2
for context switches from host to guest and vice versa, and 1 counter
for getting the total time spent inside the KVM guest. Add a tracepoint
that enables reading the counters for use by ftrace/perf. Note that this
tracepoint is only available for nestedv2 API (i.e, KVM on PowerVM).

[1] Terminology:
a. L1 refers to the VM (LPAR) booted on top of PAPR hypervisor
b. L2 refers to the KVM guest booted on top of L1.

Signed-off-by: Vaibhav Jain 
Signed-off-by: Gautam Menghani 
---
v5 -> v6:
1. Use TRACE_EVENT_FN to enable/disable counters only once.
2. Remove the agg. counters from vcpu->arch.
3. Use PACA to maintain old counter values instead of zeroing on every
entry.
4. Simplify variable names

v4 -> v5:
1. Define helper functions for getting/setting the accumulation counter
in L2's VPA

v3 -> v4:
1. After vcpu_run, check the VPA flag instead of checking for tracepoint
being enabled for disabling the cs time accumulation.

v2 -> v3:
1. Move the counter disabling and zeroing code to a different function.
2. Move the get_lppaca() inside the tracepoint_enabled() branch.
3. Add the aggregation logic to maintain total context switch time.

v1 -> v2:
1. Fix the build error due to invalid struct member reference.

 arch/powerpc/include/asm/lppaca.h | 11 +--
 arch/powerpc/include/asm/paca.h   |  5 +++
 arch/powerpc/kvm/book3s_hv.c  | 52 +++
 arch/powerpc/kvm/trace_hv.h   | 27 
 4 files changed, 92 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/lppaca.h 
b/arch/powerpc/include/asm/lppaca.h
index 61ec2447dabf..f40a646bee3c 100644
--- a/arch/powerpc/include/asm/lppaca.h
+++ b/arch/powerpc/include/asm/lppaca.h
@@ -62,7 +62,8 @@ struct lppaca {
u8  donate_dedicated_cpu;   /* Donate dedicated CPU cycles */
u8  fpregs_in_use;
u8  pmcregs_in_use;
-   u8  reserved8[28];
+   u8  l2_counters_enable;  /* Enable usage of counters for KVM guest 
*/
+   u8  reserved8[27];
__be64  wait_state_cycles;  /* Wait cycles for this proc */
u8  reserved9[28];
__be16  slb_count;  /* # of SLBs to maintain */
@@ -92,9 +93,13 @@ struct lppaca {
/* cacheline 4-5 */
 
__be32  page_ins;   /* CMO Hint - # page ins by OS */
-   u8  reserved12[148];
+   u8  reserved12[28];
+   volatile __be64 l1_to_l2_cs_tb;
+   volatile __be64 l2_to_l1_cs_tb;
+   volatile __be64 l2_runtime_tb;
+   u8 reserved13[96];
volatile __be64 dtl_idx;/* Dispatch Trace Log head index */
-   u8  reserved13[96];
+   u8  reserved14[96];
 } cacheline_aligned;
 
 #define lppaca_of(cpu) (*paca_ptrs[cpu]->lppaca_ptr)
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 1d58da946739..f20ac7a6efa4 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -278,6 +278,11 @@ struct paca_struct {
struct mce_info *mce_info;
u8 mce_pending_irq_work;
 #endif /* CONFIG_PPC_BOOK3S_64 */
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+   u64 l1_to_l2_cs;
+   u64 l2_to_l1_cs;
+   u64 l2_runtime_agg;
+#endif
 } cacheline_aligned;
 
 extern void copy_mm_to_paca(struct mm_struct *mm);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 8e86eb577eb8..ed69ad58bd02 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -4108,6 +4108,54 @@ static void vcpu_vpa_increment_dispatch(struct kvm_vcpu 
*vcpu)
}
 }
 
+static inline int kvmhv_get_l2_counters_status(void)
+{
+   return get_lppaca()->l2_counters_enable;
+}
+
+static inline void kvmhv_set_l2_counters_status(int cpu, bool status)
+{
+   if (status)
+   lppaca_of(cpu).l2_counters_enable = 1;
+   else
+   lppaca_of(cpu).l2_counters_enable = 0;
+}
+
+int kmvhv_counters_tracepoint_regfunc(void)
+{
+   int cpu;
+
+   for_each_possible_cpu(cpu) {
+   kvmhv_set_l2_counters_status(cpu, true);
+   }
+   return 0;
+}
+
+void kmvhv_counters_tracepoint_unregfunc(void)
+{
+   int cpu;
+
+   for_each_possible_cpu(cpu) {
+   kvmhv_set_l2_counters_status(cpu, false);
+   }
+}
+
+static void do_trace_nested_cs_time(struct kvm_vcpu *vcpu)
+{
+   struct lppaca *lp = get_lppaca();
+   u64 l1_to_l2_ns, l2_to_l1_ns, l2_runtime_ns;
+
+   l1_to_l2_ns = tb_to_ns(be64_to_cpu(lp->l1_to_l2_cs_tb));
+   l2_to_l1_ns = tb_to_ns(be64_to_cpu(lp->l2_to_l1_cs_tb));
+   l2_runtime_ns = tb_to_ns(be64_to_cpu(lp->l2_runtime_tb));
+   trace_kvmppc_vcpu_stats(vcpu, l1_to_l2_ns - local_paca->l1_to_l2_cs,
+   l2_to_l1_ns - local_paca->l2_to_l1_cs,
+   l2_runtim

Re: [PATCH v3] kprobe/ftrace: bail out if ftrace was killed

2024-05-06 Thread Christophe Leroy


Le 01/05/2024 à 18:29, Stephen Brennan a écrit :
> If an error happens in ftrace, ftrace_kill() will prevent disarming
> kprobes. Eventually, the ftrace_ops associated with the kprobes will be
> freed, yet the kprobes will still be active, and when triggered, they
> will use the freed memory, likely resulting in a page fault and panic.
> 
> This behavior can be reproduced quite easily, by creating a kprobe and
> then triggering a ftrace_kill(). For simplicity, we can simulate an
> ftrace error with a kernel module like [1]:
> 
> [1]: https://github.com/brenns10/kernel_stuff/tree/master/ftrace_killer
> 
>sudo perf probe --add commit_creds
>sudo perf trace -e probe:commit_creds
># In another terminal
>make
>sudo insmod ftrace_killer.ko  # calls ftrace_kill(), simulating bug
># Back to perf terminal
># ctrl-c
>sudo perf probe --del commit_creds
> 
> After a short period, a page fault and panic would occur as the kprobe
> continues to execute and uses the freed ftrace_ops. While ftrace_kill()
> is supposed to be used only in extreme circumstances, it is invoked in
> FTRACE_WARN_ON() and so there are many places where an unexpected bug
> could be triggered, yet the system may continue operating, possibly
> without the administrator noticing. If ftrace_kill() does not panic the
> system, then we should do everything we can to continue operating,
> rather than leave a ticking time bomb.
> 
> Signed-off-by: Stephen Brennan 
> ---
> Changes in v3:
>Don't expose ftrace_is_dead(). Create a "kprobe_ftrace_disabled"
>variable and check it directly in the kprobe handlers.

Isn't it safer to provide a fonction rather than a direct access to a 
variable ?

By the way, wouldn't it be more performant to use a static branch (jump 
label) ?

Christophe


Re: [PATCH v3 1/2] PCI: Add TLP Prefix reading into pcie_read_tlp_log()

2024-05-06 Thread Ilpo Järvinen
On Fri, 3 May 2024, Bjorn Helgaas wrote:

> On Fri, Apr 12, 2024 at 04:36:34PM +0300, Ilpo Järvinen wrote:
> > pcie_read_tlp_log() handles only 4 TLP Header Log DWORDs but TLP Prefix
> > Log (PCIe r6.1 secs 7.8.4.12 & 7.9.14.13) may also be present.
> > 
> > Generalize pcie_read_tlp_log() and struct pcie_tlp_log to handle also
> > TLP Prefix Log. The layout of relevant registers in AER and DPC
> > Capability is not identical because the offsets of TLP Header Log and
> > TLP Prefix Log vary so the callers must pass the offsets to
> > pcie_read_tlp_log().
> 
> I think the layouts of the Header Log and the TLP Prefix Log *are*
> identical, but they are at different offsets in the AER Capability vs
> the DPC Capability.  Lukas and I have both stumbled over this.

I'll try to reword it once again.

The way it's spec'ed, there actually also a small difference in sizes too 
(PCIe r6 7.9.14.13 says DPC one can be < 4 DWs whereas AER on is always 4 
DWs regardless of the number of supported E-E Prefixes) so I'll just 
rewrite it so it doesn't focus just on the offset.

> Similar and more comments at:
> https://lore.kernel.org/r/20240322193011.GA701027@bhelgaas

I'm really sorry, I missed those comments and only focused on that ixgbe 
part.

> > Convert eetlp_prefix_path into integer called eetlp_prefix_max and
> > make is available also when CONFIG_PCI_PASID is not configured to
> > be able to determine the number of E-E Prefixes.
> 
> s/make is/make it/
> 
> I think this could be a separate patch.

Sure, I can make it own patch.

> > --- a/include/linux/aer.h
> > +++ b/include/linux/aer.h
> > @@ -20,6 +20,7 @@ struct pci_dev;
> >  
> >  struct pcie_tlp_log {
> > u32 dw[4];
> > +   u32 prefix[4];
> >  };
> >  
> >  struct aer_capability_regs {
> > @@ -37,7 +38,9 @@ struct aer_capability_regs {
> > u16 uncor_err_source;
> >  };
> >  
> > -int pcie_read_tlp_log(struct pci_dev *dev, int where, struct pcie_tlp_log 
> > *log);
> > +int pcie_read_tlp_log(struct pci_dev *dev, int where, int where2,
> > + unsigned int tlp_len, struct pcie_tlp_log *log);
> > +unsigned int aer_tlp_log_len(struct pci_dev *dev);
> 
> I think it was a mistake to expose pcie_read_tlp_log() outside
> drivers/pci, and I don't think we should expose aer_tlp_log_len()
> either.

Ah, my intention was to remove the exposure but I only ended up removing 
the actual EXPORT and didn't realize I should have also moved the 
prototype into another header.

I'll add also a patch to remove pcie_read_tlp_log() EXPORT too but I'm 
wondering now whether I should also move these function(s) into 
pcie/aer.c (or somewhere else that is only build if AER is enabled) since 
there won't be callers ourside of AER/DPC?

> We might be stuck with exposing struct pcie_tlp_log since it looks
> like ras_event.h uses it.

Yes.

-- 
 i.

Re: WARNING: CPU: 1 PID: 1 at net/core/netpoll.c:370 netpoll_send_skb+0x1fc/0x20c at boot when netconsole is enabled (kernel v6.9-rc5, v6.8.7, sungem, PowerMac G4 DP)

2024-05-06 Thread Jakub Kicinski
On Sun, 5 May 2024 23:27:13 +0200 Erhard Furtner wrote:
> > On Sun, 28 Apr 2024 12:53:06 +0200 Erhard Furtner wrote:  
> > > With netconsole enabled I get this "WARNING: CPU: 1 PID: 1 at
> > > net/core/netpoll.c:370 netpoll_send_skb+0x1fc/0x20c" and "WARNING:
> > > CPU: 1 PID: 1 at kernel/locking/irqflag-debug.c:10
> > > warn_bogus_irq_restore+0x30/0x44" at boot on my PowerMac G4 DP.
> > > Happens more often than not (6-7 out of 10 times booting):
> > 
> > Could you try with LOCKDEP enabled?
> > I wonder if irqs_disabled() behaves differently than we expect.  
> 
> Ok, after a few tries I got a "BUG: spinlock wrong CPU on CPU#0, swapper/0/1" 
> LOCKDEP hit. But this does not happen every time when I get the netpoll_send 
> WARNING:

Oh, can you try deleting the gem_poll_controller() function?
Unhook it from ndo_poll_controller and remove it completely.


[PATCH 7/7] powerpc/platforms: Move files from 4xx to 44x

2024-05-06 Thread Michael Ellerman
From: Christophe Leroy 

Only 44x uses 4xx now, so only keep one directory.

Signed-off-by: Christophe Leroy 
Signed-off-by: Michael Ellerman 
---
 arch/powerpc/platforms/44x/Makefile   |  6 -
 arch/powerpc/platforms/{4xx => 44x}/cpm.c |  0
 arch/powerpc/platforms/{4xx => 44x}/gpio.c|  0
 .../powerpc/platforms/{4xx => 44x}/hsta_msi.c |  0
 arch/powerpc/platforms/44x/machine_check.c| 15 
 arch/powerpc/platforms/{4xx => 44x}/pci.c |  0
 arch/powerpc/platforms/{4xx => 44x}/pci.h |  0
 arch/powerpc/platforms/{4xx => 44x}/soc.c |  0
 arch/powerpc/platforms/{4xx => 44x}/uic.c |  0
 arch/powerpc/platforms/4xx/Makefile   |  7 --
 arch/powerpc/platforms/4xx/machine_check.c| 23 ---
 arch/powerpc/platforms/Makefile   |  1 -
 12 files changed, 20 insertions(+), 32 deletions(-)
 rename arch/powerpc/platforms/{4xx => 44x}/cpm.c (100%)
 rename arch/powerpc/platforms/{4xx => 44x}/gpio.c (100%)
 rename arch/powerpc/platforms/{4xx => 44x}/hsta_msi.c (100%)
 rename arch/powerpc/platforms/{4xx => 44x}/pci.c (100%)
 rename arch/powerpc/platforms/{4xx => 44x}/pci.h (100%)
 rename arch/powerpc/platforms/{4xx => 44x}/soc.c (100%)
 rename arch/powerpc/platforms/{4xx => 44x}/uic.c (100%)
 delete mode 100644 arch/powerpc/platforms/4xx/Makefile
 delete mode 100644 arch/powerpc/platforms/4xx/machine_check.c

diff --git a/arch/powerpc/platforms/44x/Makefile 
b/arch/powerpc/platforms/44x/Makefile
index 5ba031f57652..ca7b1bb442d9 100644
--- a/arch/powerpc/platforms/44x/Makefile
+++ b/arch/powerpc/platforms/44x/Makefile
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-obj-y  += misc_44x.o machine_check.o
+obj-y  += misc_44x.o machine_check.o uic.o soc.o
 ifneq ($(CONFIG_PPC4xx_CPM),y)
 obj-y  += idle.o
 endif
@@ -12,3 +12,7 @@ obj-$(CONFIG_CANYONLANDS)+= canyonlands.o
 obj-$(CONFIG_CURRITUCK)+= ppc476.o
 obj-$(CONFIG_AKEBONO)  += ppc476.o
 obj-$(CONFIG_FSP2) += fsp2.o
+obj-$(CONFIG_PCI)  += pci.o
+obj-$(CONFIG_PPC4xx_HSTA_MSI)  += hsta_msi.o
+obj-$(CONFIG_PPC4xx_CPM)   += cpm.o
+obj-$(CONFIG_PPC4xx_GPIO)  += gpio.o
diff --git a/arch/powerpc/platforms/4xx/cpm.c b/arch/powerpc/platforms/44x/cpm.c
similarity index 100%
rename from arch/powerpc/platforms/4xx/cpm.c
rename to arch/powerpc/platforms/44x/cpm.c
diff --git a/arch/powerpc/platforms/4xx/gpio.c 
b/arch/powerpc/platforms/44x/gpio.c
similarity index 100%
rename from arch/powerpc/platforms/4xx/gpio.c
rename to arch/powerpc/platforms/44x/gpio.c
diff --git a/arch/powerpc/platforms/4xx/hsta_msi.c 
b/arch/powerpc/platforms/44x/hsta_msi.c
similarity index 100%
rename from arch/powerpc/platforms/4xx/hsta_msi.c
rename to arch/powerpc/platforms/44x/hsta_msi.c
diff --git a/arch/powerpc/platforms/44x/machine_check.c 
b/arch/powerpc/platforms/44x/machine_check.c
index 5d19daacd78a..85ff33a8d9b6 100644
--- a/arch/powerpc/platforms/44x/machine_check.c
+++ b/arch/powerpc/platforms/44x/machine_check.c
@@ -9,6 +9,21 @@
 #include 
 #include 
 
+int machine_check_4xx(struct pt_regs *regs)
+{
+   unsigned long reason = regs->esr;
+
+   if (reason & ESR_IMCP) {
+   printk("Instruction");
+   mtspr(SPRN_ESR, reason & ~ESR_IMCP);
+   } else
+   printk("Data");
+
+   printk(" machine check in kernel mode.\n");
+
+   return 0;
+}
+
 int machine_check_440A(struct pt_regs *regs)
 {
unsigned long reason = regs->esr;
diff --git a/arch/powerpc/platforms/4xx/pci.c b/arch/powerpc/platforms/44x/pci.c
similarity index 100%
rename from arch/powerpc/platforms/4xx/pci.c
rename to arch/powerpc/platforms/44x/pci.c
diff --git a/arch/powerpc/platforms/4xx/pci.h b/arch/powerpc/platforms/44x/pci.h
similarity index 100%
rename from arch/powerpc/platforms/4xx/pci.h
rename to arch/powerpc/platforms/44x/pci.h
diff --git a/arch/powerpc/platforms/4xx/soc.c b/arch/powerpc/platforms/44x/soc.c
similarity index 100%
rename from arch/powerpc/platforms/4xx/soc.c
rename to arch/powerpc/platforms/44x/soc.c
diff --git a/arch/powerpc/platforms/4xx/uic.c b/arch/powerpc/platforms/44x/uic.c
similarity index 100%
rename from arch/powerpc/platforms/4xx/uic.c
rename to arch/powerpc/platforms/44x/uic.c
diff --git a/arch/powerpc/platforms/4xx/Makefile 
b/arch/powerpc/platforms/4xx/Makefile
deleted file mode 100644
index 7f57c35f8dec..
--- a/arch/powerpc/platforms/4xx/Makefile
+++ /dev/null
@@ -1,7 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-obj-y  += uic.o machine_check.o
-obj-$(CONFIG_44x)  += soc.o
-obj-$(CONFIG_PCI)  += pci.o
-obj-$(CONFIG_PPC4xx_HSTA_MSI)  += hsta_msi.o
-obj-$(CONFIG_PPC4xx_CPM)   += cpm.o
-obj-$(CONFIG_PPC4xx_GPIO)  += gpio.o
diff --git a/arch/powerpc/platforms/4xx/machine_check.c 
b/arch/powerpc/platforms/4xx/machine_check.c
deleted file mode 100644
index a905da1d6f41..
--- a/arch/powerpc/platforms/4xx/machine_check.c
+++ /dev/null
@@ -1,23 +0,0 @@
-// 

[PATCH 6/7] powerpc: Replace CONFIG_4xx with CONFIG_44x

2024-05-06 Thread Michael Ellerman
Replace 4xx usage with 44x, and replace 4xx_SOC with 44x.

Signed-off-by: Michael Ellerman 
---
 arch/powerpc/Kconfig   | 5 +
 arch/powerpc/include/asm/cacheflush.h  | 2 +-
 arch/powerpc/include/asm/ppc_asm.h | 2 +-
 arch/powerpc/kernel/entry_32.S | 6 +++---
 arch/powerpc/kernel/process.c  | 2 +-
 arch/powerpc/mm/fault.c| 4 ++--
 arch/powerpc/mm/ptdump/Makefile| 2 +-
 arch/powerpc/platforms/4xx/Makefile| 2 +-
 arch/powerpc/platforms/Kconfig.cputype | 8 +---
 arch/powerpc/sysdev/Kconfig| 4 ++--
 10 files changed, 14 insertions(+), 23 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 9a7d2b218516..2b6fa87464a5 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -488,7 +488,7 @@ source "kernel/Kconfig.hz"
 
 config MATH_EMULATION
bool "Math emulation"
-   depends on 4xx || PPC_8xx || PPC_MPC832x || BOOKE || PPC_MICROWATT
+   depends on 44x || PPC_8xx || PPC_MPC832x || BOOKE || PPC_MICROWATT
select PPC_FPU_REGS
help
  Some PowerPC chips designed for embedded applications do not have
@@ -1102,9 +1102,6 @@ config PPC4xx_CPM
  It also enables support for two different idle states (idle-wait
  and idle-doze).
 
-config 4xx_SOC
-   bool
-
 config FSL_LBC
bool "Freescale Local Bus support"
help
diff --git a/arch/powerpc/include/asm/cacheflush.h 
b/arch/powerpc/include/asm/cacheflush.h
index ef7d2de33b89..f2656774aaa9 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -121,7 +121,7 @@ static inline void invalidate_dcache_range(unsigned long 
start,
mb();   /* sync */
 }
 
-#ifdef CONFIG_4xx
+#ifdef CONFIG_44x
 static inline void flush_instruction_cache(void)
 {
iccci((void *)KERNELBASE);
diff --git a/arch/powerpc/include/asm/ppc_asm.h 
b/arch/powerpc/include/asm/ppc_asm.h
index 1d1018c1e482..02897f4b0dbf 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -482,7 +482,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, 
CPU_FTR_CELL_TB_BUG, 96)
  * and they must be used.
  */
 
-#if !defined(CONFIG_4xx) && !defined(CONFIG_PPC_8xx)
+#if !defined(CONFIG_44x) && !defined(CONFIG_PPC_8xx)
 #define tlbia  \
li  r4,1024;\
mtctr   r4; \
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 1522164b10e4..98ad926c056f 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -211,7 +211,7 @@ start_kernel_thread:
 
.globl  fast_exception_return
 fast_exception_return:
-#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
+#if !(defined(CONFIG_44x) || defined(CONFIG_BOOKE))
andi.   r10,r9,MSR_RI   /* check for recoverable interrupt */
beq 3f  /* if not, we've got problems */
 #endif
@@ -365,7 +365,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
rfi
 _ASM_NOKPROBE_SYMBOL(interrupt_return)
 
-#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
+#if defined(CONFIG_44x) || defined(CONFIG_BOOKE)
 
 /*
  * Returning from a critical interrupt in user mode doesn't need
@@ -469,4 +469,4 @@ ret_from_mcheck_exc:
RET_FROM_EXC_LEVEL(SPRN_MCSRR0, SPRN_MCSRR1, PPC_RFMCI)
 _ASM_NOKPROBE_SYMBOL(ret_from_mcheck_exc)
 #endif /* CONFIG_BOOKE */
-#endif /* !(CONFIG_4xx || CONFIG_BOOKE) */
+#endif /* !(CONFIG_44x || CONFIG_BOOKE) */
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 196cfa41ad6e..cddb4c099bbd 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1566,7 +1566,7 @@ static void __show_regs(struct pt_regs *regs)
if (trap == INTERRUPT_MACHINE_CHECK ||
trap == INTERRUPT_DATA_STORAGE ||
trap == INTERRUPT_ALIGNMENT) {
-   if (IS_ENABLED(CONFIG_4xx) || IS_ENABLED(CONFIG_BOOKE))
+   if (IS_ENABLED(CONFIG_44x) || IS_ENABLED(CONFIG_BOOKE))
pr_cont("DEAR: "REG" ESR: "REG" ", regs->dear, 
regs->esr);
else
pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, 
regs->dsisr);
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 53335ae21a40..9af44ddf4b53 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -361,13 +361,13 @@ static void sanity_check_fault(bool is_write, bool 
is_user,
  * Define the correct "is_write" bit in error_code based
  * on the processor family
  */
-#if (defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
+#if (defined(CONFIG_44x) || defined(CONFIG_BOOKE))
 #define page_fault_is_write(__err) ((__err) & ESR_DST)
 #else
 #define page_fault_is_write(__err) ((__err) & DSISR_ISSTORE)
 #endif
 
-#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
+#if defined(CONFIG_44x) || defined(CONFIG_BOOKE)
 #define

[PATCH 5/7] powerpc/4xx: Remove CONFIG_BOOKE_OR_40x

2024-05-06 Thread Michael Ellerman
Now that 40x is gone, replace CONFIG_BOOKE_OR_40x by CONFIG_BOOKE.

Signed-off-by: Michael Ellerman 
---
 arch/powerpc/include/asm/hw_irq.h  | 8 
 arch/powerpc/include/asm/irq.h | 2 +-
 arch/powerpc/include/asm/kup.h | 2 +-
 arch/powerpc/include/asm/processor.h   | 2 +-
 arch/powerpc/include/asm/ptrace.h  | 2 +-
 arch/powerpc/include/asm/reg.h | 2 +-
 arch/powerpc/kernel/asm-offsets.c  | 2 +-
 arch/powerpc/kernel/entry_32.S | 2 +-
 arch/powerpc/kernel/epapr_hcalls.S | 2 +-
 arch/powerpc/kernel/irq.c  | 2 +-
 arch/powerpc/kernel/kgdb.c | 4 ++--
 arch/powerpc/kernel/process.c  | 2 +-
 arch/powerpc/kernel/setup.h| 2 +-
 arch/powerpc/kernel/setup_32.c | 2 +-
 arch/powerpc/kernel/time.c | 2 +-
 arch/powerpc/mm/mmu_context.c  | 2 +-
 arch/powerpc/mm/nohash/mmu_context.c   | 2 +-
 arch/powerpc/platforms/Kconfig.cputype | 5 -
 18 files changed, 21 insertions(+), 26 deletions(-)

diff --git a/arch/powerpc/include/asm/hw_irq.h 
b/arch/powerpc/include/asm/hw_irq.h
index 317659fdeacf..569ac1165b06 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -63,7 +63,7 @@
 
 static inline void __hard_irq_enable(void)
 {
-   if (IS_ENABLED(CONFIG_BOOKE_OR_40x))
+   if (IS_ENABLED(CONFIG_BOOKE))
wrtee(MSR_EE);
else if (IS_ENABLED(CONFIG_PPC_8xx))
wrtspr(SPRN_EIE);
@@ -75,7 +75,7 @@ static inline void __hard_irq_enable(void)
 
 static inline void __hard_irq_disable(void)
 {
-   if (IS_ENABLED(CONFIG_BOOKE_OR_40x))
+   if (IS_ENABLED(CONFIG_BOOKE))
wrtee(0);
else if (IS_ENABLED(CONFIG_PPC_8xx))
wrtspr(SPRN_EID);
@@ -87,7 +87,7 @@ static inline void __hard_irq_disable(void)
 
 static inline void __hard_EE_RI_disable(void)
 {
-   if (IS_ENABLED(CONFIG_BOOKE_OR_40x))
+   if (IS_ENABLED(CONFIG_BOOKE))
wrtee(0);
else if (IS_ENABLED(CONFIG_PPC_8xx))
wrtspr(SPRN_NRI);
@@ -99,7 +99,7 @@ static inline void __hard_EE_RI_disable(void)
 
 static inline void __hard_RI_enable(void)
 {
-   if (IS_ENABLED(CONFIG_BOOKE_OR_40x))
+   if (IS_ENABLED(CONFIG_BOOKE))
return;
 
if (IS_ENABLED(CONFIG_PPC_8xx))
diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
index ba1a5974e714..aa3751960ffd 100644
--- a/arch/powerpc/include/asm/irq.h
+++ b/arch/powerpc/include/asm/irq.h
@@ -33,7 +33,7 @@ extern int distribute_irqs;
 
 struct pt_regs;
 
-#ifdef CONFIG_BOOKE_OR_40x
+#ifdef CONFIG_BOOKE
 /*
  * Per-cpu stacks for handling critical, debug and machine check
  * level interrupts.
diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h
index ad7e8c5aec3f..2bb03d941e3e 100644
--- a/arch/powerpc/include/asm/kup.h
+++ b/arch/powerpc/include/asm/kup.h
@@ -20,7 +20,7 @@ static __always_inline bool kuap_is_disabled(void);
 #include 
 #endif
 
-#ifdef CONFIG_BOOKE_OR_40x
+#ifdef CONFIG_BOOKE
 #include 
 #endif
 
diff --git a/arch/powerpc/include/asm/processor.h 
b/arch/powerpc/include/asm/processor.h
index b2c51d337e60..667444da0ba4 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -159,7 +159,7 @@ struct thread_struct {
unsigned long   sr0;
 #endif
 #endif /* CONFIG_PPC32 */
-#if defined(CONFIG_BOOKE_OR_40x) && defined(CONFIG_PPC_KUAP)
+#if defined(CONFIG_BOOKE) && defined(CONFIG_PPC_KUAP)
unsigned long   pid;/* value written in PID reg. at interrupt exit 
*/
 #endif
/* Debug Registers */
diff --git a/arch/powerpc/include/asm/ptrace.h 
b/arch/powerpc/include/asm/ptrace.h
index ea8f91fbc62f..7b9350756875 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -310,7 +310,7 @@ static inline void regs_set_return_value(struct pt_regs 
*regs, unsigned long rc)
 
 static inline bool cpu_has_msr_ri(void)
 {
-   return !IS_ENABLED(CONFIG_BOOKE_OR_40x);
+   return !IS_ENABLED(CONFIG_BOOKE);
 }
 
 static inline bool regs_is_unrecoverable(struct pt_regs *regs)
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 85b98f625053..b2cbd2bc3adf 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -18,7 +18,7 @@
 #include 
 
 /* Pickup Book E specific registers. */
-#ifdef CONFIG_BOOKE_OR_40x
+#ifdef CONFIG_BOOKE
 #include 
 #endif
 
diff --git a/arch/powerpc/kernel/asm-offsets.c 
b/arch/powerpc/kernel/asm-offsets.c
index f029755f9e69..23733282de4d 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -54,7 +54,7 @@
 #endif
 
 #ifdef CONFIG_PPC32
-#ifdef CONFIG_BOOKE_OR_40x
+#ifdef CONFIG_BOOKE
 #include "head_booke.h"
 #endif
 #endif
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 070eab8b6a28..1522164b10e4 100644
--- a/arch/powerpc/kernel/entry_32.S
+++

[PATCH 3/7] powerpc: Remove 40x from Kconfig and defconfig

2024-05-06 Thread Michael Ellerman
Remove 40x from Kconfig, making the code unreachable.

Signed-off-by: Michael Ellerman 
---
 arch/powerpc/Kconfig   | 12 ++--
 arch/powerpc/Kconfig.debug | 13 -
 arch/powerpc/Makefile  |  5 -
 arch/powerpc/configs/40x.config|  2 --
 arch/powerpc/platforms/Kconfig.cputype | 21 -
 5 files changed, 10 insertions(+), 43 deletions(-)
 delete mode 100644 arch/powerpc/configs/40x.config

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 1c4be3373686..9a7d2b218516 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -148,7 +148,7 @@ config PPC
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_SCALED_CPUTIME  if VIRT_CPU_ACCOUNTING_NATIVE 
&& PPC_BOOK3S_64
select ARCH_HAS_SET_MEMORY
-   select ARCH_HAS_STRICT_KERNEL_RWX   if (PPC_BOOK3S || PPC_8xx || 
40x) && !HIBERNATION
+   select ARCH_HAS_STRICT_KERNEL_RWX   if (PPC_BOOK3S || PPC_8xx) && 
!HIBERNATION
select ARCH_HAS_STRICT_KERNEL_RWX   if PPC_85xx && !HIBERNATION && 
!RANDOMIZE_BASE
select ARCH_HAS_STRICT_MODULE_RWX   if ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_HAS_SYSCALL_WRAPPER if !SPU_BASE && !COMPAT
@@ -165,7 +165,7 @@ config PPC
select ARCH_SPLIT_ARG64 if PPC32
select ARCH_STACKWALK
select ARCH_SUPPORTS_ATOMIC_RMW
-   select ARCH_SUPPORTS_DEBUG_PAGEALLOCif PPC_BOOK3S || PPC_8xx || 40x
+   select ARCH_SUPPORTS_DEBUG_PAGEALLOCif PPC_BOOK3S || PPC_8xx
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_CMPXCHG_LOCKREF if PPC64
select ARCH_USE_MEMTEST
@@ -387,7 +387,7 @@ config ARCH_SUSPEND_POSSIBLE
def_bool y
depends on ADB_PMU || PPC_EFIKA || PPC_LITE5200 || PPC_83xx || \
   (PPC_85xx && !PPC_E500MC) || PPC_86xx || PPC_PSERIES \
-  || 44x || 40x
+  || 44x
 
 config ARCH_SUSPEND_NONZERO_CPU
def_bool y
@@ -441,7 +441,7 @@ config ARCH_SUPPORTS_UPROBES
 
 config PPC_ADV_DEBUG_REGS
bool
-   depends on 40x || BOOKE
+   depends on BOOKE
default y
 
 config PPC_ADV_DEBUG_IACS
@@ -1071,7 +1071,7 @@ config GENERIC_ISA_DMA
 config PPC_INDIRECT_PCI
bool
depends on PCI
-   default y if 40x || 44x
+   default y if 44x
 
 config SBUS
bool
@@ -1096,7 +1096,7 @@ config FSL_PMC
 config PPC4xx_CPM
bool
default y
-   depends on SUSPEND && (44x || 40x)
+   depends on SUSPEND && 44x
help
  PPC4xx Clock Power Management (CPM) support (suspend/resume).
  It also enables support for two different idle states (idle-wait
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index 8c80b154e814..3799ceceb04a 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -244,14 +244,6 @@ config PPC_EARLY_DEBUG_44x
  inbuilt serial port.  If you enable this, ensure you set
  PPC_EARLY_DEBUG_44x_PHYSLOW below to suit your target board.
 
-config PPC_EARLY_DEBUG_40x
-   bool "Early serial debugging for IBM/AMCC 40x CPUs"
-   depends on 40x
-   help
- Select this to enable early debugging for IBM 40x chips via the
- inbuilt serial port. This works on chips with a 16550 compatible
- UART.
-
 config PPC_EARLY_DEBUG_CPM
bool "Early serial debugging for Freescale CPM-based serial ports"
depends on SERIAL_CPM=y
@@ -356,11 +348,6 @@ config PPC_EARLY_DEBUG_44x_PHYSHIGH
depends on PPC_EARLY_DEBUG_44x
default "0x1"
 
-config PPC_EARLY_DEBUG_40x_PHYSADDR
-   hex "Early debug UART physical address"
-   depends on PPC_EARLY_DEBUG_40x
-   default "0xef600300"
-
 config PPC_EARLY_DEBUG_CPM_ADDR
hex "CPM UART early debug transmit descriptor address"
depends on PPC_EARLY_DEBUG_CPM
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 65261cbe5bfd..a59563e1550b 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -302,11 +302,6 @@ ppc32_allmodconfig:
$(Q)$(MAKE) 
KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/book3s_32.config \
-f $(srctree)/Makefile allmodconfig
 
-generated_configs += ppc40x_allmodconfig
-ppc40x_allmodconfig:
-   $(Q)$(MAKE) 
KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/40x.config \
-   -f $(srctree)/Makefile allmodconfig
-
 generated_configs += ppc44x_allmodconfig
 ppc44x_allmodconfig:
$(Q)$(MAKE) 
KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/44x.config \
diff --git a/arch/powerpc/configs/40x.config b/arch/powerpc/configs/40x.config
deleted file mode 100644
index 82a9d58ddb81..
--- a/arch/powerpc/configs/40x.config
+++ /dev/null
@@ -1,2 +0,0 @@
-CONFIG_PPC64=n
-CONFIG_40x=y
diff --git a/arch/powerpc/platforms/Kconfig.cputype 
b/arch/powerpc/platforms/Kconfig.cputype
index b2d8c0da2ad9..2b686ee2dd2b 100644

[PATCH 2/7] powerpc/boot: Remove all 40x platforms from boot

2024-05-06 Thread Michael Ellerman
From: Christophe Leroy 

Remove 40x platforms from the boot directory.

Signed-off-by: Christophe Leroy 
Signed-off-by: Michael Ellerman 
---
 arch/powerpc/boot/4xx.c | 266 --
 arch/powerpc/boot/4xx.h |   4 -
 arch/powerpc/boot/Makefile  |  11 -
 arch/powerpc/boot/cuboot-acadia.c   | 171 
 arch/powerpc/boot/cuboot-hotfoot.c  | 139 --
 arch/powerpc/boot/cuboot-kilauea.c  |  46 
 arch/powerpc/boot/dcr.h |  11 -
 arch/powerpc/boot/dts/acadia.dts| 224 ---
 arch/powerpc/boot/dts/hotfoot.dts   | 296 
 arch/powerpc/boot/dts/kilauea.dts   | 407 
 arch/powerpc/boot/dts/obs600.dts| 314 -
 arch/powerpc/boot/ppcboot-hotfoot.h | 119 
 arch/powerpc/boot/ppcboot.h |   2 +-
 13 files changed, 1 insertion(+), 2009 deletions(-)
 delete mode 100644 arch/powerpc/boot/cuboot-acadia.c
 delete mode 100644 arch/powerpc/boot/cuboot-hotfoot.c
 delete mode 100644 arch/powerpc/boot/cuboot-kilauea.c
 delete mode 100644 arch/powerpc/boot/dts/acadia.dts
 delete mode 100644 arch/powerpc/boot/dts/hotfoot.dts
 delete mode 100644 arch/powerpc/boot/dts/kilauea.dts
 delete mode 100644 arch/powerpc/boot/dts/obs600.dts
 delete mode 100644 arch/powerpc/boot/ppcboot-hotfoot.h

diff --git a/arch/powerpc/boot/4xx.c b/arch/powerpc/boot/4xx.c
index 00c4d843a023..682ca3827892 100644
--- a/arch/powerpc/boot/4xx.c
+++ b/arch/powerpc/boot/4xx.c
@@ -253,7 +253,6 @@ void ibm4xx_denali_fixup_memsize(void)
dt_fixup_memory(0, memsize);
 }
 
-#define SPRN_DBCR0_40X 0x3F2
 #define SPRN_DBCR0_44X 0x134
 #define DBCR0_RST_SYSTEM 0x3000
 
@@ -270,18 +269,6 @@ void ibm44x_dbcr_reset(void)
 
 }
 
-void ibm40x_dbcr_reset(void)
-{
-   unsigned long tmp;
-
-   asm volatile (
-   "mfspr  %0,%1\n"
-   "oris   %0,%0,%2@h\n"
-   "mtspr  %1,%0"
-   : "=&r"(tmp) : "i"(SPRN_DBCR0_40X), "i"(DBCR0_RST_SYSTEM)
-   );
-}
-
 #define EMAC_RESET 0x2000
 void ibm4xx_quiesce_eth(u32 *emac0, u32 *emac1)
 {
@@ -544,256 +531,3 @@ void ibm440spe_fixup_clocks(unsigned int sys_clk,
eplike_fixup_uart_clk(1, "/plb/opb/serial@f300", ser_clk, plb_clk);
eplike_fixup_uart_clk(2, "/plb/opb/serial@f600", ser_clk, plb_clk);
 }
-
-void ibm405gp_fixup_clocks(unsigned int sys_clk, unsigned int ser_clk)
-{
-   u32 pllmr = mfdcr(DCRN_CPC0_PLLMR);
-   u32 cpc0_cr0 = mfdcr(DCRN_405_CPC0_CR0);
-   u32 cpc0_cr1 = mfdcr(DCRN_405_CPC0_CR1);
-   u32 psr = mfdcr(DCRN_405_CPC0_PSR);
-   u32 cpu, plb, opb, ebc, tb, uart0, uart1, m;
-   u32 fwdv, fwdvb, fbdv, cbdv, opdv, epdv, ppdv, udiv;
-
-   fwdv = (8 - ((pllmr & 0xe000) >> 29));
-   fbdv = (pllmr & 0x1e00) >> 25;
-   if (fbdv == 0)
-   fbdv = 16;
-   cbdv = ((pllmr & 0x0006) >> 17) + 1; /* CPU:PLB */
-   opdv = ((pllmr & 0x00018000) >> 15) + 1; /* PLB:OPB */
-   ppdv = ((pllmr & 0x6000) >> 13) + 1; /* PLB:PCI */
-   epdv = ((pllmr & 0x1800) >> 11) + 2; /* PLB:EBC */
-   udiv = ((cpc0_cr0 & 0x3e) >> 1) + 1;
-
-   /* check for 405GPr */
-   if ((mfpvr() & 0xfff0) == (0x50910951 & 0xfff0)) {
-   fwdvb = 8 - (pllmr & 0x0007);
-   if (!(psr & 0x1000)) /* PCI async mode enable == 0 */
-   if (psr & 0x0020) /* New mode enable */
-   m = fwdvb * 2 * ppdv;
-   else
-   m = fwdvb * cbdv * ppdv;
-   else if (psr & 0x0020) /* New mode enable */
-   if (psr & 0x0800) /* PerClk synch mode */
-   m = fwdvb * 2 * epdv;
-   else
-   m = fbdv * fwdv;
-   else if (epdv == fbdv)
-   m = fbdv * cbdv * epdv;
-   else
-   m = fbdv * fwdvb * cbdv;
-
-   cpu = sys_clk * m / fwdv;
-   plb = sys_clk * m / (fwdvb * cbdv);
-   } else {
-   m = fwdv * fbdv * cbdv;
-   cpu = sys_clk * m / fwdv;
-   plb = cpu / cbdv;
-   }
-   opb = plb / opdv;
-   ebc = plb / epdv;
-
-   if (cpc0_cr0 & 0x80)
-   /* uart0 uses the external clock */
-   uart0 = ser_clk;
-   else
-   uart0 = cpu / udiv;
-
-   if (cpc0_cr0 & 0x40)
-   /* uart1 uses the external clock */
-   uart1 = ser_clk;
-   else
-   uart1 = cpu / udiv;
-
-   /* setup the timebase clock to tick at the cpu frequency */
-   cpc0_cr1 = cpc0_cr1 & ~0x0080;
-   mtdcr(DCRN_405_CPC0_CR1, cpc0_cr1);
-   tb = cpu;
-
-   dt_fixup_cpu_clocks(cpu, tb, 0);
-   dt_fixup_clock("/plb", plb);
-   dt_fixup_clock("/plb/opb", opb);
-   dt_fixup_clock("/plb/ebc", ebc);

[PATCH 1/7] powerpc/40x: Remove 40x platforms.

2024-05-06 Thread Michael Ellerman
From: Christophe Leroy 

40x platforms have been orphaned for many years.

Remove them.

Signed-off-by: Christophe Leroy 
Signed-off-by: Michael Ellerman 
---
 MAINTAINERS |  1 -
 arch/powerpc/configs/40x/acadia_defconfig   | 61 
 arch/powerpc/configs/40x/kilauea_defconfig  | 69 --
 arch/powerpc/configs/40x/klondike_defconfig | 43 
 arch/powerpc/configs/40x/makalu_defconfig   | 59 
 arch/powerpc/configs/40x/obs600_defconfig   | 69 --
 arch/powerpc/configs/40x/walnut_defconfig   | 55 ---
 arch/powerpc/configs/ppc40x_defconfig   | 74 ---
 arch/powerpc/platforms/40x/Kconfig  | 78 -
 arch/powerpc/platforms/40x/Makefile |  2 -
 arch/powerpc/platforms/40x/ppc40x_simple.c  | 74 ---
 arch/powerpc/platforms/Kconfig  |  1 -
 arch/powerpc/platforms/Makefile |  1 -
 13 files changed, 587 deletions(-)
 delete mode 100644 arch/powerpc/configs/40x/acadia_defconfig
 delete mode 100644 arch/powerpc/configs/40x/kilauea_defconfig
 delete mode 100644 arch/powerpc/configs/40x/klondike_defconfig
 delete mode 100644 arch/powerpc/configs/40x/makalu_defconfig
 delete mode 100644 arch/powerpc/configs/40x/obs600_defconfig
 delete mode 100644 arch/powerpc/configs/40x/walnut_defconfig
 delete mode 100644 arch/powerpc/configs/ppc40x_defconfig
 delete mode 100644 arch/powerpc/platforms/40x/Kconfig
 delete mode 100644 arch/powerpc/platforms/40x/Makefile
 delete mode 100644 arch/powerpc/platforms/40x/ppc40x_simple.c

diff --git a/MAINTAINERS b/MAINTAINERS
index f096c9fff5b3..09857ade8e89 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12520,7 +12520,6 @@ F:  arch/powerpc/platforms/52xx/
 LINUX FOR POWERPC EMBEDDED PPC4XX
 L: linuxppc-dev@lists.ozlabs.org
 S: Orphan
-F: arch/powerpc/platforms/40x/
 F: arch/powerpc/platforms/44x/
 
 LINUX FOR POWERPC EMBEDDED PPC85XX
diff --git a/arch/powerpc/configs/40x/acadia_defconfig 
b/arch/powerpc/configs/40x/acadia_defconfig
deleted file mode 100644
index 25eed86ec528..
--- a/arch/powerpc/configs/40x/acadia_defconfig
+++ /dev/null
@@ -1,61 +0,0 @@
-CONFIG_40x=y
-CONFIG_SYSVIPC=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_LOG_BUF_SHIFT=14
-CONFIG_BLK_DEV_INITRD=y
-CONFIG_EXPERT=y
-CONFIG_KALLSYMS_ALL=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-# CONFIG_BLK_DEV_BSG is not set
-CONFIG_ACADIA=y
-CONFIG_PCI=y
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_INET=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-# CONFIG_IPV6 is not set
-CONFIG_CONNECTOR=y
-CONFIG_MTD=y
-CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_BLOCK=m
-CONFIG_MTD_CFI=y
-CONFIG_MTD_JEDECPROBE=y
-CONFIG_MTD_CFI_AMDSTD=y
-CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=35000
-CONFIG_NETDEVICES=y
-CONFIG_IBM_EMAC=y
-CONFIG_IBM_EMAC_RXB=256
-CONFIG_IBM_EMAC_TXB=256
-CONFIG_IBM_EMAC_DEBUG=y
-# CONFIG_INPUT is not set
-# CONFIG_SERIO is not set
-# CONFIG_VT is not set
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_8250_EXTENDED=y
-CONFIG_SERIAL_8250_SHARE_IRQ=y
-CONFIG_SERIAL_OF_PLATFORM=y
-# CONFIG_HW_RANDOM is not set
-# CONFIG_HWMON is not set
-CONFIG_THERMAL=y
-# CONFIG_USB_SUPPORT is not set
-CONFIG_EXT2_FS=y
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_CRAMFS=y
-CONFIG_NFS_FS=y
-CONFIG_ROOT_NFS=y
-CONFIG_DEBUG_FS=y
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_DETECT_HUNG_TASK=y
-CONFIG_CRYPTO_CBC=y
-CONFIG_CRYPTO_ECB=y
-CONFIG_CRYPTO_PCBC=y
-CONFIG_CRYPTO_MD5=y
-CONFIG_CRYPTO_DES=y
diff --git a/arch/powerpc/configs/40x/kilauea_defconfig 
b/arch/powerpc/configs/40x/kilauea_defconfig
deleted file mode 100644
index 3549c9e950e8..
--- a/arch/powerpc/configs/40x/kilauea_defconfig
+++ /dev/null
@@ -1,69 +0,0 @@
-CONFIG_40x=y
-CONFIG_SYSVIPC=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_LOG_BUF_SHIFT=14
-CONFIG_BLK_DEV_INITRD=y
-CONFIG_EXPERT=y
-CONFIG_KALLSYMS_ALL=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-# CONFIG_BLK_DEV_BSG is not set
-CONFIG_KILAUEA=y
-CONFIG_PCI=y
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_INET=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-# CONFIG_IPV6 is not set
-CONFIG_CONNECTOR=y
-CONFIG_MTD=y
-CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_BLOCK=y
-CONFIG_MTD_CFI=y
-CONFIG_MTD_JEDECPROBE=y
-CONFIG_MTD_CFI_AMDSTD=y
-CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_MTD_RAW_NAND=y
-CONFIG_MTD_NAND_NDFC=y
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=35000
-CONFIG_NETDEVICES=y
-CONFIG_IBM_EMAC=y
-CONFIG_IBM_EMAC_RXB=256
-CONFIG_IBM_EMAC_TXB=256
-# CONFIG_INPUT is not set
-# CONFIG_SERIO is not set
-# CONFIG_VT is not set
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_8250_EXTENDED=y
-CONFIG_SERIAL_8250_SHARE_IRQ=y
-CONFIG_SERIAL_OF_PLATFORM=y
-# CONFIG_HW_RANDOM is not set
-CONFIG_I2C=y
-CONFIG_I2C_CHARDEV=y
-CONFIG_I2C_IBM_IIC=y
-CONFIG_SENSORS_LM75=y
-CONFIG_THERMAL=y
-# CONFI

[PATCH 0/7] Remove 40x

2024-05-06 Thread Michael Ellerman
The 40x platforms & CPUs are old and have been unmaintained for years, and as
far as we can tell have no users.

Note 44x and 476 are not affected.

Christophe first proposed removing 40x in 2020:
  
https://lore.kernel.org/linuxppc-dev/40899eb1b8f10da3706acd06c3d46d2418c8886e.1585635837.git.christophe.le...@c-s.fr/

And on a few subsequent occasions.

The proposed removal was covered on Phronix:
  https://www.phoronix.com/news/PowerPC-40x-400-Linux-Removal

OpenWRT dropped support in 2019.

So unless anyone steps up to maintain it, remove 40x as unused. RIP.

cheers


More context: https://github.com/linuxppc/issues/issues/349

Christophe Leroy (4):
  powerpc/40x: Remove 40x platforms.
  powerpc/boot: Remove all 40x platforms from boot
  powerpc: Remove core support for 40x
  powerpc/platforms: Move files from 4xx to 44x

Michael Ellerman (3):
  powerpc: Remove 40x from Kconfig and defconfig
  powerpc/4xx: Remove CONFIG_BOOKE_OR_40x
  powerpc: Replace CONFIG_4xx with CONFIG_44x

 MAINTAINERS   |   1 -
 arch/powerpc/Kconfig  |  17 +-
 arch/powerpc/Kconfig.debug|  13 -
 arch/powerpc/Makefile |   5 -
 arch/powerpc/boot/4xx.c   | 266 ---
 arch/powerpc/boot/4xx.h   |   4 -
 arch/powerpc/boot/Makefile|  11 -
 arch/powerpc/boot/cuboot-acadia.c | 171 -
 arch/powerpc/boot/cuboot-hotfoot.c| 139 
 arch/powerpc/boot/cuboot-kilauea.c|  46 --
 arch/powerpc/boot/dcr.h   |  11 -
 arch/powerpc/boot/dts/acadia.dts  | 224 --
 arch/powerpc/boot/dts/hotfoot.dts | 296 ---
 arch/powerpc/boot/dts/kilauea.dts | 407 --
 arch/powerpc/boot/dts/obs600.dts  | 314 
 arch/powerpc/boot/ppcboot-hotfoot.h   | 119 ---
 arch/powerpc/boot/ppcboot.h   |   2 +-
 arch/powerpc/configs/40x.config   |   2 -
 arch/powerpc/configs/40x/acadia_defconfig |  61 --
 arch/powerpc/configs/40x/kilauea_defconfig|  69 --
 arch/powerpc/configs/40x/klondike_defconfig   |  43 --
 arch/powerpc/configs/40x/makalu_defconfig |  59 --
 arch/powerpc/configs/40x/obs600_defconfig |  69 --
 arch/powerpc/configs/40x/walnut_defconfig |  55 --
 arch/powerpc/configs/ppc40x_defconfig |  74 --
 arch/powerpc/include/asm/cacheflush.h |   2 +-
 arch/powerpc/include/asm/cputable.h   |   7 -
 arch/powerpc/include/asm/hw_irq.h |   8 +-
 arch/powerpc/include/asm/irq.h|   2 +-
 arch/powerpc/include/asm/kup.h|   2 +-
 arch/powerpc/include/asm/mmu.h|   7 -
 arch/powerpc/include/asm/nohash/32/mmu-40x.h  |  68 --
 arch/powerpc/include/asm/nohash/32/pgtable.h  |   4 +-
 arch/powerpc/include/asm/nohash/32/pte-40x.h  |  73 --
 arch/powerpc/include/asm/nohash/mmu.h |   5 +-
 arch/powerpc/include/asm/ppc_asm.h|   2 +-
 arch/powerpc/include/asm/processor.h  |   2 +-
 arch/powerpc/include/asm/ptrace.h |   2 +-
 arch/powerpc/include/asm/reg.h|  27 +-
 arch/powerpc/include/asm/reg_booke.h  | 113 +--
 arch/powerpc/include/asm/time.h   |   7 +-
 arch/powerpc/include/asm/udbg.h   |   1 -
 arch/powerpc/kernel/Makefile  |   1 -
 arch/powerpc/kernel/asm-offsets.c |   2 +-
 arch/powerpc/kernel/cpu_specs.h   |   4 -
 arch/powerpc/kernel/cpu_specs_40x.h   | 280 ---
 arch/powerpc/kernel/entry_32.S|  48 +-
 arch/powerpc/kernel/epapr_hcalls.S|   2 +-
 arch/powerpc/kernel/head_32.h |  12 +-
 arch/powerpc/kernel/head_40x.S| 721 --
 arch/powerpc/kernel/head_booke.h  |   3 +-
 arch/powerpc/kernel/irq.c |   2 +-
 arch/powerpc/kernel/kgdb.c|   4 +-
 arch/powerpc/kernel/misc_32.S |  40 -
 arch/powerpc/kernel/process.c |   4 +-
 arch/powerpc/kernel/setup.h   |   2 +-
 arch/powerpc/kernel/setup_32.c|   2 +-
 arch/powerpc/kernel/time.c|   2 +-
 arch/powerpc/kernel/traps.c   |   2 +-
 arch/powerpc/kernel/udbg.c|   3 -
 arch/powerpc/kernel/udbg_16550.c  |  23 -
 arch/powerpc/mm/fault.c   |   4 +-
 arch/powerpc/mm/mmu_context.c |   2 +-
 arch/powerpc/mm/mmu_decl.h|   8 +-
 arch/powerpc/mm/nohash/40x.c  | 161 
 arch/powerpc/mm/nohash/Makefile   |   1 -
 arch/powerpc/mm/nohash/kup.c  |   2 -
 arch/powerpc/mm/nohash/mmu_context.c  |   5 +-
 arch/powerpc/mm/nohash/tlb_low.S  |  27 +-
 arch/powerpc/mm/ptdump/Makefile   |   2 +-
 arch/powerpc/platforms/40x/Kconfig

[PATCH V2 4/9] tools/perf: Add support to capture and parse raw instruction in objdump

2024-05-06 Thread Athira Rajeev
Add support to capture and parse raw instruction in objdump.
Currently, the perf tool infrastructure uses "--no-show-raw-insn" option
with "objdump" while disassemble. Example from powerpc with this option
for an instruction address is:

Snippet from:
objdump  --start-address= --stop-address=  -d 
--no-show-raw-insn -C 

c10224b4:   lwz r10,0(r9)

This line "lwz r10,0(r9)" is parsed to extract instruction name,
registers names and offset. Also to find whether there is a memory
reference in the operands, "memory_ref_char" field of objdump is used.
For x86, "(" is used as memory_ref_char to tackle instructions of the
form "mov  (%rax), %rcx".

In case of powerpc, not all instructions using "(" are the only memory
instructions. Example, above instruction can also be of extended form (X
form) "lwzx r10,0,r19". Inorder to easy identify the instruction category
and extract the source/target registers, patch adds support to use raw
instruction. With raw instruction, macros are added to extract opcode
and register fields.

"struct ins_operands" and "struct ins" is updated to carry opcode and
raw instruction binary code (raw_insn). Function "disasm_line__parse"
is updated to fill the raw instruction hex value and opcode in newly
added fields. There is no changes in existing code paths, which parses
the disassembled code. The architecture using the instruction name and
present approach is not altered. Since this approach targets powerpc,
the macro implementation is added for powerpc as of now.

Example:
representation using --show-raw-insn in objdump gives result:

38 01 81 e8 ld  r4,312(r1)

Here "38 01 81 e8" is the raw instruction representation. In powerpc,
this translates to instruction form: "ld RT,DS(RA)" and binary code
as:
_
| 58 |  RT  |  RA |  DS   | |
-
06 1116  30 31

Function "disasm_line__parse" is updated to capture:

line:38 01 81 e8 ld  r4,312(r1)
opcode and raw instruction "38 01 81 e8"
Raw instruction is used later to extract the reg/offset fields.

Signed-off-by: Athira Rajeev 
---
 tools/include/linux/string.h  |  2 +
 tools/lib/string.c| 13 +++
 tools/perf/arch/powerpc/util/dwarf-regs.c | 19 ++
 tools/perf/util/disasm.c  | 46 +++
 tools/perf/util/disasm.h  |  6 +++
 tools/perf/util/include/dwarf-regs.h  |  9 +
 6 files changed, 88 insertions(+), 7 deletions(-)

diff --git a/tools/include/linux/string.h b/tools/include/linux/string.h
index db5c99318c79..0acb1fc14e19 100644
--- a/tools/include/linux/string.h
+++ b/tools/include/linux/string.h
@@ -46,5 +46,7 @@ extern char * __must_check skip_spaces(const char *);
 
 extern char *strim(char *);
 
+extern void remove_spaces(char *s);
+
 extern void *memchr_inv(const void *start, int c, size_t bytes);
 #endif /* _TOOLS_LINUX_STRING_H_ */
diff --git a/tools/lib/string.c b/tools/lib/string.c
index 8b6892f959ab..21d273e69951 100644
--- a/tools/lib/string.c
+++ b/tools/lib/string.c
@@ -153,6 +153,19 @@ char *strim(char *s)
return skip_spaces(s);
 }
 
+/*
+ * remove_spaces - Removes whitespaces from @s
+ */
+void remove_spaces(char *s)
+{
+   char *d = s;
+   do {
+   while (*d == ' ') {
+   ++d;
+   }
+   } while ((*s++ = *d++));
+}
+
 /**
  * strreplace - Replace all occurrences of character in string.
  * @s: The string to operate on.
diff --git a/tools/perf/arch/powerpc/util/dwarf-regs.c 
b/tools/perf/arch/powerpc/util/dwarf-regs.c
index 0c4f4caf53ac..e60a71fd846e 100644
--- a/tools/perf/arch/powerpc/util/dwarf-regs.c
+++ b/tools/perf/arch/powerpc/util/dwarf-regs.c
@@ -98,3 +98,22 @@ int regs_query_register_offset(const char *name)
return roff->ptregs_offset;
return -EINVAL;
 }
+
+#definePPC_OP(op)  (((op) >> 26) & 0x3F)
+#define PPC_RA(a)  (((a) >> 16) & 0x1f)
+#define PPC_RT(t)  (((t) >> 21) & 0x1f)
+
+int get_opcode_insn(unsigned int raw_insn)
+{
+   return PPC_OP(raw_insn);
+}
+
+int get_source_reg(unsigned int raw_insn)
+{
+   return PPC_RA(raw_insn);
+}
+
+int get_target_reg(unsigned int raw_insn)
+{
+   return PPC_RT(raw_insn);
+}
diff --git a/tools/perf/util/disasm.c b/tools/perf/util/disasm.c
index 2de66a092cab..85692f73e78f 100644
--- a/tools/perf/util/disasm.c
+++ b/tools/perf/util/disasm.c
@@ -43,7 +43,7 @@ static int call__scnprintf(struct ins *ins, char *bf, size_t 
size,
   struct ins_operands *ops, int max_ins_name);
 
 static void ins__sort(struct arch *arch);
-static int disasm_line__parse(char *line, const char **namep, char **rawp);
+static int disasm_line__parse(char *line, const char **namep, char **rawp, int 
*opcode, int *rawp_insn);
 
 static __attribute__((constructor)) void symbol__init_regexpr(void)
 {
@@ -512,7 +512,7 @@ static 

[PATCH V2 9/9] tools/perf: Add support for global_die to capture name of variable in case of register defined variable

2024-05-06 Thread Athira Rajeev
In case of register defined variable (found using
find_data_type_global_reg), if the type of variable happens to be base
type (example, long unsigned int), perf report captures it as:

12.85%  long unsigned int  long unsigned int +0 (no field)

The above data type is actually referring to samples captured while
accessing "r1" which represents current stack pointer in powerpc.
register void *__stack_pointer asm("r1");

The dwarf debug contains this as:

<<>>
 <1><18dd772>: Abbrev Number: 129 (DW_TAG_variable)
<18dd774>   DW_AT_name: (indirect string, offset: 0x11ba): 
current_stack_pointer
<18dd778>   DW_AT_decl_file   : 51
<18dd779>   DW_AT_decl_line   : 1468
<18dd77b>   DW_AT_decl_column : 24
<18dd77c>   DW_AT_type: <0x18da5cd>
<18dd780>   DW_AT_external: 1
<18dd780>   DW_AT_location: 1 byte block: 51(DW_OP_reg1 (r1))

 where 18da5cd is:

 <1><18da5cd>: Abbrev Number: 47 (DW_TAG_base_type)
<18da5ce>   DW_AT_byte_size   : 8
<18da5cf>   DW_AT_encoding: 7   (unsigned)
<18da5d0>   DW_AT_name: (indirect string, offset: 0x55c7): long 
unsigned int
<<>>

To make it more clear to the user, capture the DW_AT_name of the
variable and save it as part of Dwarf_Global. Dwarf_Global is used so
that it can be used and retrieved while presenting the result.

Update "dso__findnew_data_type" function to set "var_name" if
variable name is set as part of Dwarf_Global. Updated
"hist_entry__typeoff_snprintf" to print var_name if it is set.
With the changes, along with "long unsigned int" report also says the
variable name as current_stack_pointer

Snippet of result:

12.85%  long unsigned int  long unsigned int +0 (current_stack_pointer)
 4.68%  struct paca_struct  struct paca_struct +2312 (__current)
 4.57%  struct paca_struct  struct paca_struct +2354 (irq_soft_mask)

Signed-off-by: Athira Rajeev 
---
 tools/perf/util/annotate-data.c | 30 --
 tools/perf/util/dwarf-aux.c |  1 +
 tools/perf/util/dwarf-aux.h |  1 +
 tools/perf/util/sort.c  |  7 +--
 4 files changed, 31 insertions(+), 8 deletions(-)

diff --git a/tools/perf/util/annotate-data.c b/tools/perf/util/annotate-data.c
index ab2168c4ef41..9f72d4b6a5f4 100644
--- a/tools/perf/util/annotate-data.c
+++ b/tools/perf/util/annotate-data.c
@@ -267,23 +267,32 @@ static void delete_members(struct annotated_member 
*member)
 }
 
 static struct annotated_data_type *dso__findnew_data_type(struct dso *dso,
- Dwarf_Die *type_die)
+ Dwarf_Die *type_die, 
Dwarf_Global *global_die)
 {
struct annotated_data_type *result = NULL;
struct annotated_data_type key;
struct rb_node *node;
struct strbuf sb;
+   struct strbuf sb_var_name;
char *type_name;
+   char *var_name;
Dwarf_Word size;
 
strbuf_init(&sb, 32);
+   strbuf_init(&sb_var_name, 32);
if (die_get_typename_from_type(type_die, &sb) < 0)
strbuf_add(&sb, "(unknown type)", 14);
+   if (global_die->name) {
+   strbuf_addstr(&sb_var_name, global_die->name);
+   var_name = strbuf_detach(&sb_var_name, NULL);
+   }
type_name = strbuf_detach(&sb, NULL);
dwarf_aggregate_size(type_die, &size);
 
/* Check existing nodes in dso->data_types tree */
key.self.type_name = type_name;
+   if (global_die->name)
+   key.self.var_name = var_name;
key.self.size = size;
node = rb_find(&key, &dso->data_types, data_type_cmp);
if (node) {
@@ -300,6 +309,8 @@ static struct annotated_data_type 
*dso__findnew_data_type(struct dso *dso,
}
 
result->self.type_name = type_name;
+   if (global_die->name)
+   result->self.var_name = var_name;
result->self.size = size;
INIT_LIST_HEAD(&result->self.children);
 
@@ -1177,7 +1188,7 @@ static int find_data_type_block(struct data_loc_info 
*dloc,
  * cu_die and match with reg to identify data type die.
  */
 static int find_data_type_global_reg(struct data_loc_info *dloc, int reg, 
Dwarf_Die *cu_die,
-   Dwarf_Die *type_die)
+   Dwarf_Die *type_die, Dwarf_Global *global_die)
 {
Dwarf_Die vr_die;
int ret = -1;
@@ -1189,8 +1200,11 @@ static int find_data_type_global_reg(struct 
data_loc_info *dloc, int reg, Dwarf_
if (dwarf_offdie(dloc->di->dbg, var_types->die_off, 
&vr_die)) {
if (die_get_real_type(&vr_die, type_die) == 
NULL) {
dloc->type_offset = 0;
+   global_die->name = var_types->name;
dwarf_offdie(dloc->di->dbg, 
var_types->die_off, type_die);
}
+   global_die->d

[PATCH V2 8/9] tools/perf: Add support to find global register variables using find_data_type_global_reg

2024-05-06 Thread Athira Rajeev
There are cases where define a global register variable and associate it
with a specified register. Example, in powerpc, two registers are
defined to represent variable:
1. r13: represents local_paca
register struct paca_struct *local_paca asm("r13");

2. r1: represents stack_pointer
register void *__stack_pointer asm("r1");

These regs are present in dwarf debug as DW_OP_reg as part of variables
in the cu_die (compile unit). These are not present in die search done
in the list of nested scopes since these are global register variables.

Example for local_paca represented by r13:

<<>>
 <1><18dc6b4>: Abbrev Number: 128 (DW_TAG_variable)
<18dc6b6>   DW_AT_name: (indirect string, offset: 0x3861): 
local_paca
<18dc6ba>   DW_AT_decl_file   : 48
<18dc6bb>   DW_AT_decl_line   : 36
<18dc6bc>   DW_AT_decl_column : 30
<18dc6bd>   DW_AT_type: <0x18dc6c3>
<18dc6c1>   DW_AT_external: 1
<18dc6c1>   DW_AT_location: 1 byte block: 5d(DW_OP_reg13 (r13))

 <1><18dc6c3>: Abbrev Number: 3 (DW_TAG_pointer_type)
<18dc6c4>   DW_AT_byte_size   : 8
<18dc6c4>   DW_AT_type: <0x18dc353>

Where  DW_AT_type : <0x18dc6c3> further points to :

 <1><18dc6c3>: Abbrev Number: 3 (DW_TAG_pointer_type)
<18dc6c4>   DW_AT_byte_size   : 8
<18dc6c4>   DW_AT_type: <0x18dc353>

which belongs to:

 <1><18dc353>: Abbrev Number: 67 (DW_TAG_structure_type)
<18dc354>   DW_AT_name: (indirect string, offset: 0x56cd): 
paca_struct
<18dc358>   DW_AT_byte_size   : 2944
<18dc35a>   DW_AT_alignment   : 128
<18dc35b>   DW_AT_decl_file   : 48
<18dc35c>   DW_AT_decl_line   : 61
<18dc35d>   DW_AT_decl_column : 8
<18dc35d>   DW_AT_sibling : <0x18dc6b4>
<<>>

Similar is case with "r1".

<<>>
 <1><18dd772>: Abbrev Number: 129 (DW_TAG_variable)
<18dd774>   DW_AT_name: (indirect string, offset: 0x11ba): 
current_stack_pointer
<18dd778>   DW_AT_decl_file   : 51
<18dd779>   DW_AT_decl_line   : 1468
<18dd77b>   DW_AT_decl_column : 24
<18dd77c>   DW_AT_type: <0x18da5cd>
<18dd780>   DW_AT_external: 1
<18dd780>   DW_AT_location: 1 byte block: 51(DW_OP_reg1 (r1))

 where 18da5cd is:

 <1><18da5cd>: Abbrev Number: 47 (DW_TAG_base_type)
<18da5ce>   DW_AT_byte_size   : 8
<18da5cf>   DW_AT_encoding: 7   (unsigned)
<18da5d0>   DW_AT_name: (indirect string, offset: 0x55c7): long 
unsigned int
<<>>

To identify data type for these two special cases, iterate over
variables in the CU die (Compile Unit) and match it with the register.
If the variable is a base type, ie die_get_real_type will return NULL
here, set offset to zero. With the changes, data type for "paca_struct"
and "long unsigned int" for r1 is identified.

Snippet from ./perf report -s type,type_off

12.85%  long unsigned int  long unsigned int +0 (no field)
 4.68%  struct paca_struct  struct paca_struct +2312 (__current)
 4.57%  struct paca_struct  struct paca_struct +2354 (irq_soft_mask)

Signed-off-by: Athira Rajeev 
---
 tools/perf/util/annotate-data.c  | 40 
 tools/perf/util/annotate.c   |  8 ++
 tools/perf/util/annotate.h   |  1 +
 tools/perf/util/include/dwarf-regs.h |  1 +
 4 files changed, 50 insertions(+)

diff --git a/tools/perf/util/annotate-data.c b/tools/perf/util/annotate-data.c
index e22ba35c93b2..ab2168c4ef41 100644
--- a/tools/perf/util/annotate-data.c
+++ b/tools/perf/util/annotate-data.c
@@ -1169,6 +1169,40 @@ static int find_data_type_block(struct data_loc_info 
*dloc,
return ret;
 }
 
+/*
+ * Handle cases where define a global register variable and
+ * associate it with a specified register. These regs are
+ * present in dwarf debug as DW_OP_reg as part of variables
+ * in the cu_die (compile unit). Iterate over variables in the
+ * cu_die and match with reg to identify data type die.
+ */
+static int find_data_type_global_reg(struct data_loc_info *dloc, int reg, 
Dwarf_Die *cu_die,
+   Dwarf_Die *type_die)
+{
+   Dwarf_Die vr_die;
+   int ret = -1;
+   struct die_var_type *var_types = NULL;
+
+   die_collect_vars(cu_die, &var_types);
+   while (var_types) {
+   if (var_types->reg == reg) {
+   if (dwarf_offdie(dloc->di->dbg, var_types->die_off, 
&vr_die)) {
+   if (die_get_real_type(&vr_die, type_die) == 
NULL) {
+   dloc->type_offset = 0;
+   dwarf_offdie(dloc->di->dbg, 
var_types->die_off, type_die);
+   }
+   pr_debug_type_name(type_die, TSR_KIND_TYPE);
+   ret = 0;
+   pr_debug_dtp("found by CU for %s (die:%#lx)\n",
+   dwarf_diename(type_die), 
(long)dwarf_dieoffset(type_die));
+   }
+  

[PATCH V2 7/9] tools/perf: Update instruction tracking with add instruction

2024-05-06 Thread Athira Rajeev
Update instruction tracking with add instruction. Apart from "mr"
instruction, the register state is carried on by other insns, ie,
"add, addi, addis". Since these are not memory instructions and doesn't
fall in the range of (32 to 63), add these as part of nmemonic table.
For now, add* instructions are added. There is possibility of getting
more added here. But to extract regs, still the binary code will be
used. So associate this with "load_store_ops" itself and no other
changes required.

Signed-off-by: Athira Rajeev 
---
 .../perf/arch/powerpc/annotate/instructions.c | 21 +++
 tools/perf/util/disasm.c  |  1 +
 2 files changed, 22 insertions(+)

diff --git a/tools/perf/arch/powerpc/annotate/instructions.c 
b/tools/perf/arch/powerpc/annotate/instructions.c
index cce7023951fe..1f35d8a65bb4 100644
--- a/tools/perf/arch/powerpc/annotate/instructions.c
+++ b/tools/perf/arch/powerpc/annotate/instructions.c
@@ -1,6 +1,17 @@
 // SPDX-License-Identifier: GPL-2.0
 #include 
 
+/*
+ * powerpc instruction nmemonic table to associate load/store instructions with
+ * move_ops. mov_ops is used to identify add/mr to do instruction tracking.
+ */
+static struct ins powerpc__instructions[] = {
+   { .name = "mr", .ops = &load_store_ops,  },
+   { .name = "addi",   .ops = &load_store_ops,   },
+   { .name = "addis",  .ops = &load_store_ops,  },
+   { .name = "add",.ops = &load_store_ops,  },
+};
+
 static struct ins_ops *powerpc__associate_instruction_ops(struct arch *arch, 
const char *name)
 {
int i;
@@ -75,6 +86,9 @@ static void update_insn_state_powerpc(struct type_state 
*state,
if (annotate_get_insn_location(dloc->arch, dl, &loc) < 0)
return;
 
+   if (!strncmp(dl->ins.name, "add", 3))
+   goto regs_check;
+
if (strncmp(dl->ins.name, "mr", 2))
return;
 
@@ -85,6 +99,7 @@ static void update_insn_state_powerpc(struct type_state 
*state,
dst->reg1 = src_reg;
}
 
+regs_check:
if (!has_reg_type(state, dst->reg1))
return;
 
@@ -115,6 +130,12 @@ static void update_insn_state_powerpc(struct type_state 
*state __maybe_unused, s
 static int powerpc__annotate_init(struct arch *arch, char *cpuid 
__maybe_unused)
 {
if (!arch->initialized) {
+   arch->nr_instructions = ARRAY_SIZE(powerpc__instructions);
+   arch->instructions = calloc(arch->nr_instructions, 
sizeof(struct ins));
+   if (!arch->instructions)
+   return -ENOMEM;
+   memcpy(arch->instructions, powerpc__instructions, sizeof(struct 
ins) * arch->nr_instructions);
+   arch->nr_instructions_allocated = arch->nr_instructions;
arch->initialized = true;
arch->associate_instruction_ops = 
powerpc__associate_instruction_ops;
arch->objdump.comment_char  = '#';
diff --git a/tools/perf/util/disasm.c b/tools/perf/util/disasm.c
index ac6b8b8da38a..32cf506a9010 100644
--- a/tools/perf/util/disasm.c
+++ b/tools/perf/util/disasm.c
@@ -36,6 +36,7 @@ static struct ins_ops mov_ops;
 static struct ins_ops nop_ops;
 static struct ins_ops lock_ops;
 static struct ins_ops ret_ops;
+static struct ins_ops load_store_ops;
 
 static int jump__scnprintf(struct ins *ins, char *bf, size_t size,
   struct ins_operands *ops, int max_ins_name);
-- 
2.43.0



[PATCH V2 6/9] tools/perf: Update instruction tracking for powerpc

2024-05-06 Thread Athira Rajeev
Add instruction tracking function "update_insn_state_powerpc" for
powerpc. Example sequence in powerpc:

ld  r10,264(r3)
mr  r31,r3
<
ld  r9,312(r31)

Consider ithe sample is pointing to: "ld r9,312(r31)".
Here the memory reference is hit at "312(r31)" where 312 is the offset
and r31 is the source register. Previous instruction sequence shows that
register state of r3 is moved to r31. So to identify the data type for r31
access, the previous instruction ("mr") needs to be tracked and the
state type entry has to be updated. Current instruction tracking support
in perf tools infrastructure is specific to x86. Patch adds this for
powerpc and adds "mr" instruction to be tracked.

Signed-off-by: Athira Rajeev 
---
 .../perf/arch/powerpc/annotate/instructions.c | 63 +++
 tools/perf/util/annotate-data.c   |  9 ++-
 tools/perf/util/disasm.c  |  1 +
 3 files changed, 72 insertions(+), 1 deletion(-)

diff --git a/tools/perf/arch/powerpc/annotate/instructions.c 
b/tools/perf/arch/powerpc/annotate/instructions.c
index a3f423c27cae..cce7023951fe 100644
--- a/tools/perf/arch/powerpc/annotate/instructions.c
+++ b/tools/perf/arch/powerpc/annotate/instructions.c
@@ -49,6 +49,69 @@ static struct ins_ops 
*powerpc__associate_instruction_ops(struct arch *arch, con
return ops;
 }
 
+/*
+ * Instruction tracking function to track register state moves.
+ * Example sequence:
+ *ld  r10,264(r3)
+ *mr  r31,r3
+ *<
+ *ld  r9,312(r31)
+ *
+ * Previous instruction sequence shows that register state of r3
+ * is moved to r31. update_insn_state_powerpc tracks these state
+ * changes
+ */
+#ifdef HAVE_DWARF_SUPPORT
+static void update_insn_state_powerpc(struct type_state *state,
+   struct data_loc_info *dloc, Dwarf_Die *cu_die __maybe_unused,
+   struct disasm_line *dl)
+{
+   struct annotated_insn_loc loc;
+   struct annotated_op_loc *src = &loc.ops[INSN_OP_SOURCE];
+   struct annotated_op_loc *dst = &loc.ops[INSN_OP_TARGET];
+   struct type_state_reg *tsr;
+   u32 insn_offset = dl->al.offset;
+
+   if (annotate_get_insn_location(dloc->arch, dl, &loc) < 0)
+   return;
+
+   if (strncmp(dl->ins.name, "mr", 2))
+   return;
+
+   if (!strncmp(dl->ins.name, "mr", 2)) {
+   int src_reg = src->reg1;
+
+   src->reg1 = dst->reg1;
+   dst->reg1 = src_reg;
+   }
+
+   if (!has_reg_type(state, dst->reg1))
+   return;
+
+   tsr = &state->regs[dst->reg1];
+
+   if (!has_reg_type(state, src->reg1) ||
+   !state->regs[src->reg1].ok) {
+   tsr->ok = false;
+   return;
+   }
+
+   tsr->type = state->regs[src->reg1].type;
+   tsr->kind = state->regs[src->reg1].kind;
+   tsr->ok = true;
+
+   pr_debug("mov [%x] reg%d -> reg%d",
+   insn_offset, src->reg1, dst->reg1);
+   pr_debug_type_name(&tsr->type, tsr->kind);
+}
+#else /* HAVE_DWARF_SUPPORT */
+static void update_insn_state_powerpc(struct type_state *state __maybe_unused, 
struct data_loc_info *dloc __maybe_unused,
+   Dwarf_Die *cu_die __maybe_unused, struct disasm_line *dl 
__maybe_unused)
+{
+   return;
+}
+#endif /* HAVE_DWARF_SUPPORT */
+
 static int powerpc__annotate_init(struct arch *arch, char *cpuid 
__maybe_unused)
 {
if (!arch->initialized) {
diff --git a/tools/perf/util/annotate-data.c b/tools/perf/util/annotate-data.c
index 9d6d4f472c85..e22ba35c93b2 100644
--- a/tools/perf/util/annotate-data.c
+++ b/tools/perf/util/annotate-data.c
@@ -1079,6 +1079,13 @@ static int find_data_type_insn(struct data_loc_info 
*dloc,
return ret;
 }
 
+static int arch_supports_insn_tracking(struct data_loc_info *dloc)
+{
+   if ((arch__is(dloc->arch, "x86")) || (arch__is(dloc->arch, "powerpc")))
+   return 1;
+   return 0;
+}
+
 /*
  * Construct a list of basic blocks for each scope with variables and try to 
find
  * the data type by updating a type state table through instructions.
@@ -1093,7 +1100,7 @@ static int find_data_type_block(struct data_loc_info 
*dloc,
int ret = -1;
 
/* TODO: other architecture support */
-   if (!arch__is(dloc->arch, "x86"))
+   if (!arch_supports_insn_tracking(dloc))
return -1;
 
prev_dst_ip = dst_ip = dloc->ip;
diff --git a/tools/perf/util/disasm.c b/tools/perf/util/disasm.c
index f41a0fadeab4..ac6b8b8da38a 100644
--- a/tools/perf/util/disasm.c
+++ b/tools/perf/util/disasm.c
@@ -151,6 +151,7 @@ static struct arch architectures[] = {
{
.name = "powerpc",
.init = powerpc__annotate_init,
+   .update_insn_state = update_insn_state_powerpc,
},
{
.name = "riscv64",
-- 
2.43.0



[PATCH V2 5/9] tools/perf: Update parameters for reg extract functions to use raw instruction on powerpc

2024-05-06 Thread Athira Rajeev
Use the raw instruction code and macros to identify memory instructions,
extract register fields and also offset. The implementation addresses
the D-form, X-form, DS-form instructions. Two main functions are added.
New parse function "load_store__parse" as instruction ops parser for
memory instructions. Unlink other parser (like mov__parse), this parser
fills in only the "raw" field for source/target and new added "mem_ref"
field. This is because, here there is no need to parse the disassembled
code and arch specific macros will take care of extracting offset and
regs which is easier and will be precise.

In powerpc, all instructions with a primary opcode from 32 to 63
are memory instructions. Update "ins__find" function to have "opcode"
also as a parameter. Don't use the "extract_reg_offset", instead use
newly added function "get_arch_regs" which will set these fields: reg1,
reg2, offset depending of where it is source or target ops.

Signed-off-by: Athira Rajeev 
---
 tools/perf/arch/powerpc/util/dwarf-regs.c | 33 +
 tools/perf/util/annotate.c| 22 -
 tools/perf/util/disasm.c  | 59 +--
 tools/perf/util/disasm.h  |  4 +-
 tools/perf/util/include/dwarf-regs.h  |  4 +-
 5 files changed, 114 insertions(+), 8 deletions(-)

diff --git a/tools/perf/arch/powerpc/util/dwarf-regs.c 
b/tools/perf/arch/powerpc/util/dwarf-regs.c
index e60a71fd846e..3121c70dc0d3 100644
--- a/tools/perf/arch/powerpc/util/dwarf-regs.c
+++ b/tools/perf/arch/powerpc/util/dwarf-regs.c
@@ -102,6 +102,9 @@ int regs_query_register_offset(const char *name)
 #definePPC_OP(op)  (((op) >> 26) & 0x3F)
 #define PPC_RA(a)  (((a) >> 16) & 0x1f)
 #define PPC_RT(t)  (((t) >> 21) & 0x1f)
+#define PPC_RB(b)  (((b) >> 11) & 0x1f)
+#define PPC_D(D)   ((D) & 0xfffe)
+#define PPC_DS(DS) ((DS) & 0xfffc)
 
 int get_opcode_insn(unsigned int raw_insn)
 {
@@ -117,3 +120,33 @@ int get_target_reg(unsigned int raw_insn)
 {
return PPC_RT(raw_insn);
 }
+
+int get_offset_opcode(int raw_insn __maybe_unused)
+{
+   int opcode = PPC_OP(raw_insn);
+
+   /* DS- form */
+   if ((opcode == 58) || (opcode == 62))
+   return PPC_DS(raw_insn);
+   else
+   return PPC_D(raw_insn);
+}
+
+/*
+ * Fills the required fields for op_loc depending on if it
+ * is a source of target.
+ * D form: ins RT,D(RA) -> src_reg1 = RA, offset = D, dst_reg1 = RT
+ * DS form: ins RT,DS(RA) -> src_reg1 = RA, offset = DS, dst_reg1 = RT
+ * X form: ins RT,RA,RB -> src_reg1 = RA, src_reg2 = RB, dst_reg1 = RT
+ */
+void get_arch_regs(int raw_insn __maybe_unused, int is_source __maybe_unused, 
struct annotated_op_loc *op_loc __maybe_unused)
+{
+   if (is_source)
+   op_loc->reg1 = get_source_reg(raw_insn);
+   else
+   op_loc->reg1 = get_target_reg(raw_insn);
+   if (op_loc->multi_regs)
+   op_loc->reg2 = PPC_RB(raw_insn);
+   if (op_loc->mem_ref)
+   op_loc->offset = get_offset_opcode(raw_insn);
+}
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 0f5e10654d09..48739c7ffdc7 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -2073,6 +2073,12 @@ static int extract_reg_offset(struct arch *arch, const 
char *str,
return 0;
 }
 
+__weak void get_arch_regs(int raw_insn __maybe_unused, int is_source 
__maybe_unused,
+   struct annotated_op_loc *op_loc __maybe_unused)
+{
+   return;
+}
+
 /**
  * annotate_get_insn_location - Get location of instruction
  * @arch: the architecture info
@@ -2117,10 +2123,12 @@ int annotate_get_insn_location(struct arch *arch, 
struct disasm_line *dl,
for_each_insn_op_loc(loc, i, op_loc) {
const char *insn_str = ops->source.raw;
bool multi_regs = ops->source.multi_regs;
+   bool mem_ref = ops->source.mem_ref;
 
if (i == INSN_OP_TARGET) {
insn_str = ops->target.raw;
multi_regs = ops->target.multi_regs;
+   mem_ref = ops->target.mem_ref;
}
 
/* Invalidate the register by default */
@@ -2130,7 +2138,19 @@ int annotate_get_insn_location(struct arch *arch, struct 
disasm_line *dl,
if (insn_str == NULL)
continue;
 
-   if (strchr(insn_str, arch->objdump.memory_ref_char)) {
+   /*
+* For powerpc, call get_arch_regs function which extracts the
+* required fields for op_loc, ie reg1, reg2, offset from the
+* raw instruction.
+*/
+   if (arch__is(arch, "powerpc")) {
+   op_loc->mem_ref = mem_ref;
+   if ((!strchr(insn_str, '(')) && (i == INSN_OP_SOURCE))
+   op_loc->multi_regs = true;
+   get_arch_reg

[PATCH V2 2/9] tools/perf: Add "update_insn_state" callback function to handle arch specific instruction tracking

2024-05-06 Thread Athira Rajeev
Add "update_insn_state" callback to "struct arch" to handle instruction
tracking. Currently updating instruction state is handled by static
function "update_insn_state_x86" which is defined in "annotate-data.c".
Make this as a callback for specific arch and move to archs specific
file "arch/x86/annotate/instructions.c" . This will help to add helper
function for other platforms in file:
"arch//annotate/instructions.c and make changes/updates
easier.

Define callback "update_insn_state" as part of "struct arch", also make
some of the debug functions non-static so that it can be referenced from
other places.

Signed-off-by: Athira Rajeev 
---
 tools/perf/arch/x86/annotate/instructions.c | 383 +++
 tools/perf/util/annotate-data.c | 391 +---
 tools/perf/util/annotate-data.h |  23 ++
 tools/perf/util/disasm.c|   2 +
 tools/perf/util/disasm.h|   7 +
 5 files changed, 423 insertions(+), 383 deletions(-)

diff --git a/tools/perf/arch/x86/annotate/instructions.c 
b/tools/perf/arch/x86/annotate/instructions.c
index 5cdf457f5cbe..cd2fa59a8034 100644
--- a/tools/perf/arch/x86/annotate/instructions.c
+++ b/tools/perf/arch/x86/annotate/instructions.c
@@ -206,3 +206,386 @@ static int x86__annotate_init(struct arch *arch, char 
*cpuid)
arch->initialized = true;
return err;
 }
+
+#ifdef HAVE_DWARF_SUPPORT
+static void update_insn_state_x86(struct type_state *state,
+ struct data_loc_info *dloc, Dwarf_Die *cu_die,
+ struct disasm_line *dl)
+{
+   struct annotated_insn_loc loc;
+   struct annotated_op_loc *src = &loc.ops[INSN_OP_SOURCE];
+   struct annotated_op_loc *dst = &loc.ops[INSN_OP_TARGET];
+   struct type_state_reg *tsr;
+   Dwarf_Die type_die;
+   u32 insn_offset = dl->al.offset;
+   int fbreg = dloc->fbreg;
+   int fboff = 0;
+
+   if (annotate_get_insn_location(dloc->arch, dl, &loc) < 0)
+   return;
+
+   if (ins__is_call(&dl->ins)) {
+   struct symbol *func = dl->ops.target.sym;
+
+   if (func == NULL)
+   return;
+
+   /* __fentry__ will preserve all registers */
+   if (!strcmp(func->name, "__fentry__"))
+   return;
+
+   pr_debug_dtp("call [%x] %s\n", insn_offset, func->name);
+
+   /* Otherwise invalidate caller-saved registers after call */
+   for (unsigned i = 0; i < ARRAY_SIZE(state->regs); i++) {
+   if (state->regs[i].caller_saved)
+   state->regs[i].ok = false;
+   }
+
+   /* Update register with the return type (if any) */
+   if (die_find_func_rettype(cu_die, func->name, &type_die)) {
+   tsr = &state->regs[state->ret_reg];
+   tsr->type = type_die;
+   tsr->kind = TSR_KIND_TYPE;
+   tsr->ok = true;
+
+   pr_debug_dtp("call [%x] return -> reg%d",
+insn_offset, state->ret_reg);
+   pr_debug_type_name(&type_die, tsr->kind);
+   }
+   return;
+   }
+
+   if (!strncmp(dl->ins.name, "add", 3)) {
+   u64 imm_value = -1ULL;
+   int offset;
+   const char *var_name = NULL;
+   struct map_symbol *ms = dloc->ms;
+   u64 ip = ms->sym->start + dl->al.offset;
+
+   if (!has_reg_type(state, dst->reg1))
+   return;
+
+   tsr = &state->regs[dst->reg1];
+
+   if (src->imm)
+   imm_value = src->offset;
+   else if (has_reg_type(state, src->reg1) &&
+state->regs[src->reg1].kind == TSR_KIND_CONST)
+   imm_value = state->regs[src->reg1].imm_value;
+   else if (src->reg1 == DWARF_REG_PC) {
+   u64 var_addr = annotate_calc_pcrel(dloc->ms, ip,
+  src->offset, dl);
+
+   if (get_global_var_info(dloc, var_addr,
+   &var_name, &offset) &&
+   !strcmp(var_name, "this_cpu_off") &&
+   tsr->kind == TSR_KIND_CONST) {
+   tsr->kind = TSR_KIND_PERCPU_BASE;
+   imm_value = tsr->imm_value;
+   }
+   }
+   else
+   return;
+
+   if (tsr->kind != TSR_KIND_PERCPU_BASE)
+   return;
+
+   if (get_global_var_type(cu_die, dloc, ip, imm_value, &offset,
+   &type_die) && offset == 0) {
+   /*
+* This is not 

[PATCH V2 3/9] tools/perf: Fix a comment about multi_regs in extract_reg_offset function

2024-05-06 Thread Athira Rajeev
Fix a comment in function which explains how multi_regs field gets set
for an instruction. In the example, "mov  %rsi, 8(%rbx,%rcx,4)", the
comment mistakenly referred to "dst_multi_regs = 0". Correct it to use
"src_multi_regs = 0"

Signed-off-by: Athira Rajeev 
---
 tools/perf/util/annotate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index f5b6b5e5e757..0f5e10654d09 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -2093,7 +2093,7 @@ static int extract_reg_offset(struct arch *arch, const 
char *str,
  *   mov  0x18, %r8  # src_reg1 = -1, src_mem = 0
  *   # dst_reg1 = r8, dst_mem = 0
  *
- *   mov  %rsi, 8(%rbx,%rcx,4)  # src_reg1 = rsi, src_mem = 0, dst_multi_regs 
= 0
+ *   mov  %rsi, 8(%rbx,%rcx,4)  # src_reg1 = rsi, src_mem = 0, src_multi_regs 
= 0
  *  # dst_reg1 = rbx, dst_reg2 = rcx, dst_mem = 1
  *  # dst_multi_regs = 1, dst_offset = 8
  */
-- 
2.43.0



[PATCH V2 1/9] tools/perf: Move the data structures related to register type to header file

2024-05-06 Thread Athira Rajeev
Data type profiling uses instruction tracking by checking each
instruction and updating the register type state in some data
structures. This is useful to find the data type in cases when the
register state gets transferred from one reg to another. Example, in
x86, "mov" instruction and in powerpc, "mr" instruction. Currently these
structures are defined in annotate-data.c and instruction tracking is
implemented only for x86. Move these data structures to
"annotate-data.h" header file so that other arch implementations can use
it in arch specific files as well.

Signed-off-by: Athira Rajeev 
---
 tools/perf/util/annotate-data.c | 53 +--
 tools/perf/util/annotate-data.h | 55 +
 2 files changed, 56 insertions(+), 52 deletions(-)

diff --git a/tools/perf/util/annotate-data.c b/tools/perf/util/annotate-data.c
index 2c98813f95cd..e812dec09c99 100644
--- a/tools/perf/util/annotate-data.c
+++ b/tools/perf/util/annotate-data.c
@@ -30,15 +30,6 @@
 
 static void delete_var_types(struct die_var_type *var_types);
 
-enum type_state_kind {
-   TSR_KIND_INVALID = 0,
-   TSR_KIND_TYPE,
-   TSR_KIND_PERCPU_BASE,
-   TSR_KIND_CONST,
-   TSR_KIND_POINTER,
-   TSR_KIND_CANARY,
-};
-
 #define pr_debug_dtp(fmt, ...) \
 do {   \
if (debug_type_profile) \
@@ -139,49 +130,7 @@ static void pr_debug_location(Dwarf_Die *die, u64 pc, int 
reg)
}
 }
 
-/*
- * Type information in a register, valid when @ok is true.
- * The @caller_saved registers are invalidated after a function call.
- */
-struct type_state_reg {
-   Dwarf_Die type;
-   u32 imm_value;
-   bool ok;
-   bool caller_saved;
-   u8 kind;
-};
-
-/* Type information in a stack location, dynamically allocated */
-struct type_state_stack {
-   struct list_head list;
-   Dwarf_Die type;
-   int offset;
-   int size;
-   bool compound;
-   u8 kind;
-};
-
-/* FIXME: This should be arch-dependent */
-#define TYPE_STATE_MAX_REGS  16
-
-/*
- * State table to maintain type info in each register and stack location.
- * It'll be updated when new variable is allocated or type info is moved
- * to a new location (register or stack).  As it'd be used with the
- * shortest path of basic blocks, it only maintains a single table.
- */
-struct type_state {
-   /* state of general purpose registers */
-   struct type_state_reg regs[TYPE_STATE_MAX_REGS];
-   /* state of stack location */
-   struct list_head stack_vars;
-   /* return value register */
-   int ret_reg;
-   /* stack pointer register */
-   int stack_reg;
-};
-
-static bool has_reg_type(struct type_state *state, int reg)
+bool has_reg_type(struct type_state *state, int reg)
 {
return (unsigned)reg < ARRAY_SIZE(state->regs);
 }
diff --git a/tools/perf/util/annotate-data.h b/tools/perf/util/annotate-data.h
index 0a57d9f5ee78..ef235b1b15e1 100644
--- a/tools/perf/util/annotate-data.h
+++ b/tools/perf/util/annotate-data.h
@@ -6,6 +6,9 @@
 #include 
 #include 
 #include 
+#include "dwarf-aux.h"
+#include "annotate.h"
+#include "debuginfo.h"
 
 struct annotated_op_loc;
 struct debuginfo;
@@ -15,6 +18,15 @@ struct hist_entry;
 struct map_symbol;
 struct thread;
 
+enum type_state_kind {
+   TSR_KIND_INVALID = 0,
+   TSR_KIND_TYPE,
+   TSR_KIND_PERCPU_BASE,
+   TSR_KIND_CONST,
+   TSR_KIND_POINTER,
+   TSR_KIND_CANARY,
+};
+
 /**
  * struct annotated_member - Type of member field
  * @node: List entry in the parent list
@@ -142,6 +154,48 @@ struct annotated_data_stat {
 };
 extern struct annotated_data_stat ann_data_stat;
 
+/*
+ * Type information in a register, valid when @ok is true.
+ * The @caller_saved registers are invalidated after a function call.
+ */
+struct type_state_reg {
+   Dwarf_Die type;
+   u32 imm_value;
+   bool ok;
+   bool caller_saved;
+   u8 kind;
+};
+
+/* Type information in a stack location, dynamically allocated */
+struct type_state_stack {
+   struct list_head list;
+   Dwarf_Die type;
+   int offset;
+   int size;
+   bool compound;
+   u8 kind;
+};
+
+/* FIXME: This should be arch-dependent */
+#define TYPE_STATE_MAX_REGS  32
+
+/*
+ * State table to maintain type info in each register and stack location.
+ * It'll be updated when new variable is allocated or type info is moved
+ * to a new location (register or stack).  As it'd be used with the
+ * shortest path of basic blocks, it only maintains a single table.
+ */
+struct type_state {
+   /* state of general purpose registers */
+   struct type_state_reg regs[TYPE_STATE_MAX_REGS];
+   /* state of stack location */
+   struct list_head stack_vars;
+   /* return value register */
+   int ret_reg;
+   /* stack pointer register */
+   int stack_reg;
+};
+
 #if

[PATCH V2 0/9] Add data type profiling support for powerpc

2024-05-06 Thread Athira Rajeev
The patchset from Namhyung added support for data type profiling
in perf tool. This enabled support to associate PMU samples to data
types they refer using DWARF debug information. With the upstream
perf, currently it possible to run perf report or perf annotate to
view the data type information on x86.

Initial patchset posted here had changes need to enable data type
profiling support for powerpc.

https://lore.kernel.org/all/6e09dc28-4a2e-49d8-a2b5-ffb3396a9...@csgroup.eu/T/

Main change were:
1. powerpc instruction nmemonic table to associate load/store
instructions with move_ops which is use to identify if instruction
is a memory access one.
2. To get register number and access offset from the given
instruction, code uses fields from "struct arch" -> objump.
Added entry for powerpc here.
3. A get_arch_regnum to return register number from the
register name string.

But the apporach used in the initial patchset used parsing of
disassembled code which the current perf tool implementation does.

Example: lwz r10,0(r9)

This line "lwz r10,0(r9)" is parsed to extract instruction name,
registers names and offset. Also to find whether there is a memory
reference in the operands, "memory_ref_char" field of objdump is used.
For x86, "(" is used as memory_ref_char to tackle instructions of the
form "mov  (%rax), %rcx".

In case of powerpc, not all instructions using "(" are the only memory
instructions. Example, above instruction can also be of extended form (X
form) "lwzx r10,0,r19". Inorder to easy identify the instruction category
and extract the source/target registers, this patchset adds support to use
raw instruction. With raw instruction, macros are added to extract opcode
and register fields.

Example representation using --show-raw-insn in objdump gives result:

38 01 81 e8 ld  r4,312(r1)

Here "38 01 81 e8" is the raw instruction representation. In powerpc,
this translates to instruction form: "ld RT,DS(RA)" and binary code
as:
_
| 58 |  RT  |  RA |  DS   | |
-
06 1116  30 31

Patchset adds support to pick the opcode and reg fields from this
raw/binary instruction code. This approach came in from review comment
by Segher Boessenkool for the initial patchset.

Apart from that, instruction tracking is enabled for powerpc and
support function is added to find variables defined as registers
Example, in powerpc, two registers are
defined to represent variable:
1. r13: represents local_paca
register struct paca_struct *local_paca asm("r13");

2. r1: represents stack_pointer
register void *__stack_pointer asm("r1");

These are handled in this patchset.

- Patch 1 is to rearrange register state type structures to header file
so that it can referred from other arch specific files
- Patch 2 is to make instruction tracking as a callback to"struct arch"
so that it can be implemented by other archs easily and defined in arch
specific files
- Patch 3 is to fix a small comment
- Patch 4 adds support to capture and parse raw instruction in objdump
by keeping existing approach intact.
- Patch 5 update parameters for reg extract functions to use raw
instruction on powerpc
- Patch 6 and patch 7 handles instruction tracking for powerpc.
- Patch 8 and Patch 8 handles support to find global register variables

With the current patchset:

 ./perf record -a -e mem-loads sleep 1
 ./perf report -s type,typeoff --hierarchy --group --stdio
 ./perf annotate --data-type --insn-stat

perf annotate logs:

Annotate Instruction stats
total 562, ok 441 (78.5%), bad 121 (21.5%)

  Name  :  Good   Bad
---
  ld:   31354
  lwz   :5132
  lbz   :31 5
  ldx   : 621
  lhz   :23 0
  lwa   : 4 3
  lwarx : 5 0
  lwzx  : 2 2
  ldarx : 3 0
  lwzu  : 2 0
  stdcx.: 0 1
  nop   : 0 1
  ldu   : 1 0
  lbzx  : 0 1
  lwax  : 0 1

perf report logs:

# Samples: 1K of event 'mem-loads'
# Event count (approx.): 937238
#
# Overhead  Data Type  Data Type Offset
#   .  
#
48.81%  (unknown)  (unknown) +0 (no field)
12.85%  long unsigned int  long unsigned int +0 (current_stack_pointer)
 4.68%  struct paca_struct  struct paca_struct +2312 (__current)
 4.57%  struct paca_struct  struct paca_struct +2354 (irq_soft_mask)
 2.68%  struct paca_struct  struct paca_struct +8 (paca_index)
 2.64%  struct paca_struct  struct paca_struct +2808 (canary)
 2.24%  struct paca_struct  struct paca_struct +48 (data_offset)
 1.41%  struct vm_fault  struct vm_fault +0 (vma)
 1.29%  struct task_struct  struct task_struct +276 (flags)
 1.03%  struct pt_regs  struct pt_regs +264 (user_regs.msr)
 1.00%  struct menu_device  struct menu_device +4 (tick_wakeup)
 0

[PATCH] selftests/mm: Fix powerpc ARCH check

2024-05-06 Thread Michael Ellerman
In commit 0518dbe97fe6 ("selftests/mm: fix cross compilation with LLVM")
the logic to detect the machine architecture in the Makefile was
changed to use ARCH, and only fallback to uname -m if ARCH is unset.
However the tests of ARCH were not updated to account for the fact that
ARCH is "powerpc" for powerpc builds, not "ppc64".

Fix it by changing the checks to look for "powerpc", and change the
uname -m logic to convert "ppc64.*" into "powerpc".

With that fixed the following tests now build for powerpc again:
 * protection_keys
 * va_high_addr_switch
 * virtual_address_range
 * write_to_hugetlbfs

Fixes: 0518dbe97fe6 ("selftests/mm: fix cross compilation with LLVM")
Cc: sta...@vger.kernel.org # v6.4+
Signed-off-by: Michael Ellerman 
---
 tools/testing/selftests/mm/Makefile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/mm/Makefile 
b/tools/testing/selftests/mm/Makefile
index eb5f39a2668b..410495e0a611 100644
--- a/tools/testing/selftests/mm/Makefile
+++ b/tools/testing/selftests/mm/Makefile
@@ -12,7 +12,7 @@ uname_M := $(shell uname -m 2>/dev/null || echo not)
 else
 uname_M := $(shell echo $(CROSS_COMPILE) | grep -o '^[a-z0-9]\+')
 endif
-ARCH ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/' -e 
's/ppc64.*/ppc64/')
+ARCH ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/' -e 
's/ppc64.*/powerpc/')
 endif
 
 # Without this, failed build products remain, with up-to-date timestamps,
@@ -98,13 +98,13 @@ TEST_GEN_FILES += $(BINARIES_64)
 endif
 else
 
-ifneq (,$(findstring $(ARCH),ppc64))
+ifneq (,$(findstring $(ARCH),powerpc))
 TEST_GEN_FILES += protection_keys
 endif
 
 endif
 
-ifneq (,$(filter $(ARCH),arm64 ia64 mips64 parisc64 ppc64 riscv64 s390x 
sparc64 x86_64))
+ifneq (,$(filter $(ARCH),arm64 ia64 mips64 parisc64 powerpc riscv64 s390x 
sparc64 x86_64))
 TEST_GEN_FILES += va_high_addr_switch
 TEST_GEN_FILES += virtual_address_range
 TEST_GEN_FILES += write_to_hugetlbfs
-- 
2.45.0



Re: [kvm-unit-tests PATCH v9 02/31] report: Add known failure reporting option

2024-05-06 Thread Thomas Huth

On 06/05/2024 10.01, Andrew Jones wrote:

On Mon, May 06, 2024 at 09:25:37AM GMT, Thomas Huth wrote:

On 04/05/2024 14.28, Nicholas Piggin wrote:

There are times we would like to test a function that is known to fail
in some conditions due to a bug in implementation (QEMU, KVM, or even
hardware). It would be nice to count these as known failures and not
report a summary failure.

xfail is not the same thing, xfail means failure is required and a pass
causes the test to fail. So add kfail for known failures.


Actually, I wonder whether that's not rather a bug in report_xfail()
instead. Currently, when you call report_xfail(true, ...), the result is
*always* counted as a failure, either as an expected failure (if the test
really failed), or as a normal failure (if the test succeeded). What's the
point of counting a successful test as a failure??

Andrew, you've originally introduced report_xfail in commit a5af7b8a67e,
could you please comment on this?



An expected failure passes when the test fails and fails when the test
passes, i.e.

   XFAIL == PASS (but separately accounted with 'xfailures')
   XPASS == FAIL

If we expect something to fail and it passes then this may be due to the
thing being fixed, so we should change the test to expect success, or
due to the test being written incorrectly for our expectations. Either
way, when an expected failure doesn't fail, it means our expectations are
wrong and we need to be alerted to that, hence a FAIL is reported.


Ok, so this was on purpose, indeed. Maybe we should add this information in 
a comment right in front of the function, so that others don't scratch their 
head, too?


Anyway, this patch here is fine then:
Reviewed-by: Thomas Huth 




Re: [PATCH v15 00/16] Add audio support in v4l2 framework

2024-05-06 Thread Jaroslav Kysela

On 06. 05. 24 10:49, Shengjiu Wang wrote:


Even now I still think V4L2 is the best option, but it looks like there
are a lot of rejects.  If develop a new ALSA-mem2mem, it is also
a duplication of code (bigger duplication that just add audio support
in V4L2 I think).


Maybe not. Could you try to evaluate a pure dma-buf (drivers/dma-buf) solution 
and add only enumeration and operation trigger mechanism to the ALSA API? It 
seems that dma-buf has enough sufficient code to transfer data from and to the 
kernel space for the further processing. I think that one buffer can be as 
source and the second for the processed data.


We can eventually add new ioctls to the ALSA's control API (/dev/snd/control*) 
for this purpose (DSP processing).


Jaroslav

--
Jaroslav Kysela 
Linux Sound Maintainer; ALSA Project; Red Hat, Inc.



Re: [PATCH v4 01/29] powerpc/mm: add ARCH_PKEY_BITS to Kconfig

2024-05-06 Thread Michael Ellerman
Joey Gouly  writes:
> The new config option specifies how many bits are in each PKEY.
>
> Signed-off-by: Joey Gouly 
> Cc: Michael Ellerman 
> Cc: Nicholas Piggin 
> Cc: Christophe Leroy 
> Cc: "Aneesh Kumar K.V" 
> Cc: "Naveen N. Rao" 
> Cc: linuxppc-dev@lists.ozlabs.org
> ---
>  arch/powerpc/Kconfig | 4 
>  1 file changed, 4 insertions(+)

Acked-by: Michael Ellerman  (powerpc)

cheers

> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index 1c4be3373686..6e33e4726856 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -1020,6 +1020,10 @@ config PPC_MEM_KEYS
>  
> If unsure, say y.
>  
> +config ARCH_PKEY_BITS
> + int
> + default 5
> +
>  config PPC_SECURE_BOOT
>   prompt "Enable secure boot support"
>   bool
> -- 
> 2.25.1


Re: [PATCH v15 00/16] Add audio support in v4l2 framework

2024-05-06 Thread Shengjiu Wang
On Fri, May 3, 2024 at 4:42 PM Mauro Carvalho Chehab  wrote:
>
> Em Fri, 3 May 2024 10:47:19 +0900
> Mark Brown  escreveu:
>
> > On Thu, May 02, 2024 at 10:26:43AM +0100, Mauro Carvalho Chehab wrote:
> > > Mauro Carvalho Chehab  escreveu:
> >
> > > > There are still time control associated with it, as audio and video
> > > > needs to be in sync. This is done by controlling the buffers size
> > > > and could be fine-tuned by checking when the buffer transfer is done.
> >
> > ...
> >
> > > Just complementing: on media, we do this per video buffer (or
> > > per half video buffer). A typical use case on cameras is to have
> > > buffers transferred 30 times per second, if the video was streamed
> > > at 30 frames per second.
> >
> > IIRC some big use case for this hardware was transcoding so there was a
> > desire to just go at whatever rate the hardware could support as there
> > is no interactive user consuming the output as it is generated.
>
> Indeed, codecs could be used to just do transcoding, but I would
> expect it to be a border use case. See, as the chipsets implementing
> codecs are typically the ones used on mobiles, I would expect that
> the major use cases to be to watch audio and video and to participate
> on audio/video conferences.
>
> Going further, the codec API may end supporting not only transcoding
> (which is something that CPU can usually handle without too much
> processing) but also audio processing that may require more
> complex algorithms - even deep learning ones - like background noise
> removal, echo detection/removal, volume auto-gain, audio enhancement
> and such.
>
> On other words, the typical use cases will either have input
> or output being a physical hardware (microphone or speaker).
>

All, thanks for spending time to discuss, it seems we go back to
the start point of this topic again.

Our main request is that there is a hardware sample rate converter
on the chip, so users can use it in user space as a component like
software sample rate converter. It mostly may run as a gstreamer plugin.
so it is a memory to memory component.

I didn't find such API in ALSA for such purpose, the best option for this
in the kernel is the V4L2 memory to memory framework I found.
As Hans said it is well designed for memory to memory.

And I think audio is one of 'media'.  As I can see that part of Radio
function is in ALSA, part of Radio function is in V4L2. part of HDMI
function is in DRM, part of HDMI function is in ALSA...
So using V4L2 for audio is not new from this point of view.

Even now I still think V4L2 is the best option, but it looks like there
are a lot of rejects.  If develop a new ALSA-mem2mem, it is also
a duplication of code (bigger duplication that just add audio support
in V4L2 I think).

Best regards
Shengjiu Wang.

> > > I would assume that, on an audio/video stream, the audio data
> > > transfer will be programmed to also happen on a regular interval.
> >
> > With audio the API is very much "wake userspace every Xms".


Re: [kvm-unit-tests PATCH v9 01/31] doc: update unittests doc

2024-05-06 Thread Andrew Jones
On Sat, May 04, 2024 at 10:28:07PM GMT, Nicholas Piggin wrote:
> This adds a few minor fixes.
> 
> Signed-off-by: Nicholas Piggin 
> ---
>  docs/unittests.txt | 12 +++-
>  1 file changed, 7 insertions(+), 5 deletions(-)
> 
> diff --git a/docs/unittests.txt b/docs/unittests.txt
> index 3192a60ec..7cf2c55ad 100644
> --- a/docs/unittests.txt
> +++ b/docs/unittests.txt
> @@ -15,8 +15,8 @@ unittests.cfg format
>  
>  # is the comment symbol, all following contents of the line is ignored.
>  
> -Each unit test is defined with a [unit-test-name] line, followed by
> -a set of parameters that control how the test case is run. The name is
> +Each unit test is defined with a [unit-test-name] line, followed by a
> +set of parameters that control how the test case is run. The name is
>  arbitrary and appears in the status reporting output.
>  
>  Parameters appear on their own lines under the test name, and have a
> @@ -62,8 +62,8 @@ groups
>  groups =   ...
>  
>  Used to group the test cases for the `run_tests.sh -g ...` run group
> -option. Adding a test to the nodefault group will cause it to not be
> -run by default.
> +option. The group name is arbitrary, aside from the nodefault group
> +which makes the test to not be run by default.
>  
>  accel
>  -
> @@ -82,8 +82,10 @@ Optional timeout in seconds, after which the test will be 
> killed and fail.
>  
>  check
>  -
> -check = =<
> +check = =
>  
>  Check a file for a particular value before running a test. The check line
>  can contain multiple files to check separated by a space, but each check
>  parameter needs to be of the form =
> +
> +The path and value can not contain space, =, or shell wildcard characters.
> -- 
> 2.43.0
>

Reviewed-by: Andrew Jones 


Re: [kvm-unit-tests PATCH v9 02/31] report: Add known failure reporting option

2024-05-06 Thread Andrew Jones
On Mon, May 06, 2024 at 09:25:37AM GMT, Thomas Huth wrote:
> On 04/05/2024 14.28, Nicholas Piggin wrote:
> > There are times we would like to test a function that is known to fail
> > in some conditions due to a bug in implementation (QEMU, KVM, or even
> > hardware). It would be nice to count these as known failures and not
> > report a summary failure.
> > 
> > xfail is not the same thing, xfail means failure is required and a pass
> > causes the test to fail. So add kfail for known failures.
> 
> Actually, I wonder whether that's not rather a bug in report_xfail()
> instead. Currently, when you call report_xfail(true, ...), the result is
> *always* counted as a failure, either as an expected failure (if the test
> really failed), or as a normal failure (if the test succeeded). What's the
> point of counting a successful test as a failure??
> 
> Andrew, you've originally introduced report_xfail in commit a5af7b8a67e,
> could you please comment on this?
> 

An expected failure passes when the test fails and fails when the test
passes, i.e.

  XFAIL == PASS (but separately accounted with 'xfailures')
  XPASS == FAIL

If we expect something to fail and it passes then this may be due to the
thing being fixed, so we should change the test to expect success, or
due to the test being written incorrectly for our expectations. Either
way, when an expected failure doesn't fail, it means our expectations are
wrong and we need to be alerted to that, hence a FAIL is reported.

Thanks,
drew

> IMHO we should rather do something like this instead:
> 
> diff --git a/lib/report.c b/lib/report.c
> --- a/lib/report.c
> +++ b/lib/report.c
> @@ -98,7 +98,7 @@ static void va_report(const char *msg_fmt,
> skipped++;
> else if (xfail && !pass)
> xfailures++;
> -   else if (xfail || !pass)
> +   else if (!xfail && !pass)
> failures++;
> 
> spin_unlock(&lock);
> 
>  Thomas
> 


Re: [kvm-unit-tests PATCH v9 03/31] powerpc: Mark known failing tests as kfail

2024-05-06 Thread Thomas Huth

On 04/05/2024 14.28, Nicholas Piggin wrote:

Mark the failing h_cede_tm and spapr_vpa tests as kfail.

Signed-off-by: Nicholas Piggin 
---
  powerpc/spapr_vpa.c | 3 ++-
  powerpc/tm.c| 3 ++-
  2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/powerpc/spapr_vpa.c b/powerpc/spapr_vpa.c
index c2075e157..46fa0485c 100644
--- a/powerpc/spapr_vpa.c
+++ b/powerpc/spapr_vpa.c
@@ -150,7 +150,8 @@ static void test_vpa(void)
report_fail("Could not deregister after registration");
  
  	disp_count1 = be32_to_cpu(vpa->vp_dispatch_count);

-   report(disp_count1 % 2 == 1, "Dispatch count is odd after deregister");
+   /* TCG known fail, could be wrong test, must verify against PowerVM */
+   report_kfail(true, disp_count1 % 2 == 1, "Dispatch count is odd after 
deregister");


Using "true" as first argument looks rather pointless - then you could also 
simply delete the test completely if it can never be tested reliably.


Thus could you please introduce a helper function is_tcg() that could be 
used to check whether we run under TCG (and not KVM)? I think you could 
check for "linux,kvm" in the "compatible" property in /hypervisor in the 
device tree to see whether we're running in KVM mode or in TCG mode.



report_prefix_pop();
  }
diff --git a/powerpc/tm.c b/powerpc/tm.c
index 6b1ceeb6e..d9e7f455d 100644
--- a/powerpc/tm.c
+++ b/powerpc/tm.c
@@ -133,7 +133,8 @@ int main(int argc, char **argv)
report_skip("TM is not available");
goto done;
}
-   report(cpus_with_tm == nr_cpus,
+   /* KVM does not report TM in secondary threads in POWER9 */
+   report_kfail(true, cpus_with_tm == nr_cpus,
   "TM available in all 'ibm,pa-features' properties");


Could you check the PVR for POWER9 here instead of using "true" as first 
parameter?


 Thomas



Re: [kvm-unit-tests PATCH v9 02/31] report: Add known failure reporting option

2024-05-06 Thread Thomas Huth

On 04/05/2024 14.28, Nicholas Piggin wrote:

There are times we would like to test a function that is known to fail
in some conditions due to a bug in implementation (QEMU, KVM, or even
hardware). It would be nice to count these as known failures and not
report a summary failure.

xfail is not the same thing, xfail means failure is required and a pass
causes the test to fail. So add kfail for known failures.


Actually, I wonder whether that's not rather a bug in report_xfail() 
instead. Currently, when you call report_xfail(true, ...), the result is 
*always* counted as a failure, either as an expected failure (if the test 
really failed), or as a normal failure (if the test succeeded). What's the 
point of counting a successful test as a failure??


Andrew, you've originally introduced report_xfail in commit a5af7b8a67e, 
could you please comment on this?


IMHO we should rather do something like this instead:

diff --git a/lib/report.c b/lib/report.c
--- a/lib/report.c
+++ b/lib/report.c
@@ -98,7 +98,7 @@ static void va_report(const char *msg_fmt,
skipped++;
else if (xfail && !pass)
xfailures++;
-   else if (xfail || !pass)
+   else if (!xfail && !pass)
failures++;

spin_unlock(&lock);

 Thomas



Re: [kvm-unit-tests PATCH v9 01/31] doc: update unittests doc

2024-05-06 Thread Thomas Huth

On 04/05/2024 14.28, Nicholas Piggin wrote:

This adds a few minor fixes.

Signed-off-by: Nicholas Piggin 
---
  docs/unittests.txt | 12 +++-
  1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/docs/unittests.txt b/docs/unittests.txt
index 3192a60ec..7cf2c55ad 100644
--- a/docs/unittests.txt
+++ b/docs/unittests.txt
@@ -15,8 +15,8 @@ unittests.cfg format
  
  # is the comment symbol, all following contents of the line is ignored.
  
-Each unit test is defined with a [unit-test-name] line, followed by

-a set of parameters that control how the test case is run. The name is
+Each unit test is defined with a [unit-test-name] line, followed by a
+set of parameters that control how the test case is run. The name is
  arbitrary and appears in the status reporting output.
  
  Parameters appear on their own lines under the test name, and have a

@@ -62,8 +62,8 @@ groups
  groups =   ...
  
  Used to group the test cases for the `run_tests.sh -g ...` run group

-option. Adding a test to the nodefault group will cause it to not be
-run by default.
+option. The group name is arbitrary, aside from the nodefault group
+which makes the test to not be run by default.
  
  accel

  -
@@ -82,8 +82,10 @@ Optional timeout in seconds, after which the test will be 
killed and fail.
  
  check

  -
-check = =<
+check = =
  
  Check a file for a particular value before running a test. The check line

  can contain multiple files to check separated by a space, but each check
  parameter needs to be of the form =
+
+The path and value can not contain space, =, or shell wildcard characters.


Could you comment on my feedback here, please:

 https://lore.kernel.org/kvm/951ccd88-0e39-4379-8d86-718e72594...@redhat.com/

 Thanks,
  Thomas