Re: [patch V4 00/37] cpu/hotplug, x86: Reworked parallel CPU bringup

2023-05-14 Thread Guilherme G. Piccoli
On 12/05/2023 18:06, Thomas Gleixner wrote:
> Hi!
> 
> This is version 4 of the reworked parallel bringup series. Version 3 can be
> found here:
> 
>https://lore.kernel.org/lkml/20230508181633.089804...@linutronix.de


Hi Thomas, thanks for series! I was able to test it on the Steam Deck
(on top of 6.4-rc2), and everything is working fine; also tested S3
suspend/resume, working as expected.

Some logs from boot time:


Parallel boot
[0.239764] smp: Bringing up secondary CPUs ...
[...]
[0.253130] smp: Brought up 1 node, 8 CPUs


Regular boot (with cpuhp.parallel=0)
[0.240093] smp: Bringing up secondary CPUs ...
[...]
[0.253475] smp: Brought up 1 node, 8 CPUs


Feel free to add (to the series):

Tested-by: Guilherme G. Piccoli  # Steam Deck

Cheers,


Guilherme



Re: [patch V4 00/37] cpu/hotplug, x86: Reworked parallel CPU bringup

2023-05-13 Thread Helge Deller

Hi Thomas,

On pátek 12. května 2023 23:06:56 CEST Thomas Gleixner wrote:

This is version 4 of the reworked parallel bringup series. Version 3 can be
found here:

https://lore.kernel.org/lkml/20230508181633.089804...@linutronix.de
...

Other than that there are no changes and the other details are all the same
as in V3 and V2.

It's also available from git:

 git://git.kernel.org/pub/scm/linux/kernel/git/tglx/devel.git hotplug


I tested your series on the parisc architecture just to make sure that it still 
works
with your patch applied.
On parisc the CPU bringup happens later in the boot process (after the 
inventory),
so your patch won't have an direct impact anyway.
But at least everything still works, incl. manual CPU enable/disable.

So, you may add
Tested-by: Helge Deller  # parisc

Thanks!
Helge



Re: [patch V4 00/37] cpu/hotplug, x86: Reworked parallel CPU bringup

2023-05-13 Thread Oleksandr Natalenko
Hello.

On pátek 12. května 2023 23:06:56 CEST Thomas Gleixner wrote:
> Hi!
> 
> This is version 4 of the reworked parallel bringup series. Version 3 can be
> found here:
> 
>https://lore.kernel.org/lkml/20230508181633.089804...@linutronix.de
> 
> This is just a reiteration to address the following details:
> 
>   1) Address review feedback (Peter Zijlstra)
> 
>   2) Fix a MIPS related build problem (0day)
> 
> Other than that there are no changes and the other details are all the same
> as in V3 and V2.
> 
> It's also available from git:
> 
> git://git.kernel.org/pub/scm/linux/kernel/git/tglx/devel.git hotplug
> 
> Diff to V3 below.
> 
> Thanks,
> 
>   tglx

With this patchset:

```

[0.137719] smpboot: Allowing 32 CPUs, 0 hotplug CPUs
[0.777312] smpboot: CPU0: AMD Ryzen 9 5950X 16-Core Processor (family: 
0x19, model: 0x21, stepping: 0x2)
[0.777896] smpboot: Parallel CPU startup enabled: 0x8000
```

Seems to survive suspend/resume cycle too.

Hence:

Tested-by: Oleksandr Natalenko 

Thanks.

> ---
> diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
> index f5e0f4235746..90c71d800b59 100644
> --- a/arch/mips/kernel/smp.c
> +++ b/arch/mips/kernel/smp.c
> @@ -690,7 +690,7 @@ void flush_tlb_one(unsigned long vaddr)
>  EXPORT_SYMBOL(flush_tlb_page);
>  EXPORT_SYMBOL(flush_tlb_one);
>  
> -#ifdef CONFIG_HOTPLUG_CPU
> +#ifdef CONFIG_HOTPLUG_CORE_SYNC_DEAD
>  void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu)
>  {
>   if (mp_ops->cleanup_dead_cpu)
> diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
> index 0438802031c3..9cd77d319555 100644
> --- a/arch/x86/kernel/head_64.S
> +++ b/arch/x86/kernel/head_64.S
> @@ -290,8 +290,7 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, 
> SYM_L_GLOBAL)
>  
>   /*  APIC ID not found in the table. Drop the trampoline lock and bail. 
> */
>   movqtrampoline_lock(%rip), %rax
> - lock
> - btrl$0, (%rax)
> + movl$0, (%rax)
>  
>  1:   cli
>   hlt
> @@ -320,8 +319,7 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, 
> SYM_L_GLOBAL)
>   movqtrampoline_lock(%rip), %rax
>   testq   %rax, %rax
>   jz  .Lsetup_gdt
> - lock
> - btrl$0, (%rax)
> + movl$0, (%rax)
>  
>  .Lsetup_gdt:
>   /*
> diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
> index 5caf4897b507..660709e94823 100644
> --- a/arch/x86/kernel/smpboot.c
> +++ b/arch/x86/kernel/smpboot.c
> @@ -161,31 +161,28 @@ static inline void 
> smpboot_restore_warm_reset_vector(void)
>  
>  }
>  
> -/*
> - * Report back to the Boot Processor during boot time or to the caller 
> processor
> - * during CPU online.
> - */
> -static void smp_callin(void)
> +/* Run the next set of setup steps for the upcoming CPU */
> +static void ap_starting(void)
>  {
>   int cpuid = smp_processor_id();
>  
>   /*
> -  * If waken up by an INIT in an 82489DX configuration the alive
> -  * synchronization guarantees we don't get here before an
> -  * INIT_deassert IPI reaches our local APIC, so it is now safe to
> -  * touch our local APIC.
> +  * If woken up by an INIT in an 82489DX configuration the alive
> +  * synchronization guarantees that the CPU does not reach this
> +  * point before an INIT_deassert IPI reaches the local APIC, so it
> +  * is now safe to touch the local APIC.
>*
>* Set up this CPU, first the APIC, which is probably redundant on
>* most boards.
>*/
>   apic_ap_setup();
>  
> - /* Save our processor parameters. */
> + /* Save the processor parameters. */
>   smp_store_cpu_info(cpuid);
>  
>   /*
>* The topology information must be up to date before
> -  * calibrate_delay() and notify_cpu_starting().
> +  * notify_cpu_starting().
>*/
>   set_cpu_sibling_map(cpuid);
>  
> @@ -197,7 +194,7 @@ static void smp_callin(void)
>  
>   /*
>* This runs the AP through all the cpuhp states to its target
> -  * state (CPUHP_ONLINE in the case of serial bringup).
> +  * state CPUHP_ONLINE.
>*/
>   notify_cpu_starting(cpuid);
>  }
> @@ -274,10 +271,7 @@ static void notrace start_secondary(void *unused)
>   rcu_cpu_starting(raw_smp_processor_id());
>   x86_cpuinit.early_percpu_clock_init();
>  
> - smp_callin();
> -
> - /* Otherwise gcc will move up smp_processor_id() before cpu_init() */
> - barrier();
> + ap_starting();
>  
>   /* Check TSC synchronization with the control CPU. */
>   check_tsc_sync_target();
> diff --git a/arch/x86/realmode/rm/trampoline_64.S 
> b/arch/x86/realmode/rm/trampoline_64.S
> index 2dfb1c400167..c6de4deec746 100644
> --- a/arch/x86/realmode/rm/trampoline_64.S
> +++ b/arch/x86/realmode/rm/trampoline_64.S
> @@ -40,17 +40,13 @@
>  .macro LOAD_REALMODE_ESP
>   /*
>* Make sure only one CPU fiddles with the realmode stack
> -  */
> + */
>  .Llock_rm\@:
> - btl $0, t

[patch V4 00/37] cpu/hotplug, x86: Reworked parallel CPU bringup

2023-05-12 Thread Thomas Gleixner
Hi!

This is version 4 of the reworked parallel bringup series. Version 3 can be
found here:

   https://lore.kernel.org/lkml/20230508181633.089804...@linutronix.de

This is just a reiteration to address the following details:

  1) Address review feedback (Peter Zijlstra)

  2) Fix a MIPS related build problem (0day)

Other than that there are no changes and the other details are all the same
as in V3 and V2.

It's also available from git:

git://git.kernel.org/pub/scm/linux/kernel/git/tglx/devel.git hotplug

Diff to V3 below.

Thanks,

tglx
---
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index f5e0f4235746..90c71d800b59 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -690,7 +690,7 @@ void flush_tlb_one(unsigned long vaddr)
 EXPORT_SYMBOL(flush_tlb_page);
 EXPORT_SYMBOL(flush_tlb_one);
 
-#ifdef CONFIG_HOTPLUG_CPU
+#ifdef CONFIG_HOTPLUG_CORE_SYNC_DEAD
 void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu)
 {
if (mp_ops->cleanup_dead_cpu)
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 0438802031c3..9cd77d319555 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -290,8 +290,7 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, 
SYM_L_GLOBAL)
 
/*  APIC ID not found in the table. Drop the trampoline lock and bail. 
*/
movqtrampoline_lock(%rip), %rax
-   lock
-   btrl$0, (%rax)
+   movl$0, (%rax)
 
 1: cli
hlt
@@ -320,8 +319,7 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, 
SYM_L_GLOBAL)
movqtrampoline_lock(%rip), %rax
testq   %rax, %rax
jz  .Lsetup_gdt
-   lock
-   btrl$0, (%rax)
+   movl$0, (%rax)
 
 .Lsetup_gdt:
/*
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 5caf4897b507..660709e94823 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -161,31 +161,28 @@ static inline void smpboot_restore_warm_reset_vector(void)
 
 }
 
-/*
- * Report back to the Boot Processor during boot time or to the caller 
processor
- * during CPU online.
- */
-static void smp_callin(void)
+/* Run the next set of setup steps for the upcoming CPU */
+static void ap_starting(void)
 {
int cpuid = smp_processor_id();
 
/*
-* If waken up by an INIT in an 82489DX configuration the alive
-* synchronization guarantees we don't get here before an
-* INIT_deassert IPI reaches our local APIC, so it is now safe to
-* touch our local APIC.
+* If woken up by an INIT in an 82489DX configuration the alive
+* synchronization guarantees that the CPU does not reach this
+* point before an INIT_deassert IPI reaches the local APIC, so it
+* is now safe to touch the local APIC.
 *
 * Set up this CPU, first the APIC, which is probably redundant on
 * most boards.
 */
apic_ap_setup();
 
-   /* Save our processor parameters. */
+   /* Save the processor parameters. */
smp_store_cpu_info(cpuid);
 
/*
 * The topology information must be up to date before
-* calibrate_delay() and notify_cpu_starting().
+* notify_cpu_starting().
 */
set_cpu_sibling_map(cpuid);
 
@@ -197,7 +194,7 @@ static void smp_callin(void)
 
/*
 * This runs the AP through all the cpuhp states to its target
-* state (CPUHP_ONLINE in the case of serial bringup).
+* state CPUHP_ONLINE.
 */
notify_cpu_starting(cpuid);
 }
@@ -274,10 +271,7 @@ static void notrace start_secondary(void *unused)
rcu_cpu_starting(raw_smp_processor_id());
x86_cpuinit.early_percpu_clock_init();
 
-   smp_callin();
-
-   /* Otherwise gcc will move up smp_processor_id() before cpu_init() */
-   barrier();
+   ap_starting();
 
/* Check TSC synchronization with the control CPU. */
check_tsc_sync_target();
diff --git a/arch/x86/realmode/rm/trampoline_64.S 
b/arch/x86/realmode/rm/trampoline_64.S
index 2dfb1c400167..c6de4deec746 100644
--- a/arch/x86/realmode/rm/trampoline_64.S
+++ b/arch/x86/realmode/rm/trampoline_64.S
@@ -40,17 +40,13 @@
 .macro LOAD_REALMODE_ESP
/*
 * Make sure only one CPU fiddles with the realmode stack
-*/
+   */
 .Llock_rm\@:
-   btl $0, tr_lock
-   jnc 2f
-   pause
-   jmp .Llock_rm\@
+lock btsl   $0, tr_lock
+jnc 2f
+pause
+jmp .Llock_rm\@
 2:
-   lock
-   btsl$0, tr_lock
-   jc  .Llock_rm\@
-
# Setup stack
movl$rm_stack_end, %esp
 .endm
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 60b4093fae9e..005f863a3d2b 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -294,14 +294,14 @@ enum cpuhp_sync_state {
  * cpuhp_ap_update_sync_state - Update synchronization state during 
bringup/teardown
  * @state: The synchr