[RFC 6/6]Physcial CPU hotadd and S3 SMP support
Boot a CPU at runtime and use it to support S3 SMP. Thanks, Shaohua --- linux-2.6.11-root/arch/i386/kernel/smpboot.c | 79 +++ linux-2.6.11-root/include/asm-i386/smp.h |4 + linux-2.6.11-root/kernel/power/main.c| 30 ++ 3 files changed, 104 insertions(+), 9 deletions(-) diff -puN arch/i386/kernel/smpboot.c~warmboot_cpu arch/i386/kernel/smpboot.c --- linux-2.6.11/arch/i386/kernel/smpboot.c~warmboot_cpu2005-04-04 09:13:48.600255048 +0800 +++ linux-2.6.11-root/arch/i386/kernel/smpboot.c2005-04-04 09:13:48.607253984 +0800 @@ -76,6 +76,12 @@ cpumask_t cpu_callin_map; cpumask_t cpu_callout_map; static cpumask_t smp_commenced_mask; +/* This is ugly, but TSC's upper 32 bits can't be written in eariler CPU + * (before prescott), there is no way to resync one AP against BP + * TBD: for prescott and above, we should use IA64's algorithm + */ +static int __devinit tsc_sync_disabled; + /* Per CPU bogomips and other parameters */ struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; @@ -412,7 +418,7 @@ static void __devinit smp_callin(void) /* * Synchronize the TSC with the BP */ - if (cpu_has_tsc && cpu_khz) + if (cpu_has_tsc && cpu_khz && !tsc_sync_disabled) synchronize_tsc_ap(); } @@ -781,8 +787,19 @@ wakeup_secondary_cpu(int phys_apicid, un #endif /* WAKE_SECONDARY_VIA_INIT */ extern cpumask_t cpu_initialized; +static inline int alloc_cpu_id(void) +{ + cpumask_t tmp_map; + int cpu; -static int __devinit do_boot_cpu(int apicid) + cpus_complement(tmp_map, cpu_present_map); + cpu = first_cpu(tmp_map); + if (cpu >= NR_CPUS) + return -ENODEV; + return cpu; +} + +static int __devinit do_boot_cpu(int apicid, int cpu) /* * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad * (ie clustered apic addressing mode), this is a LOGICAL apic ID. @@ -791,15 +808,10 @@ static int __devinit do_boot_cpu(int api { struct task_struct *idle; unsigned long boot_error; - int timeout, cpu; + int timeout; unsigned long start_eip; unsigned short nmi_high = 0, nmi_low = 0; - cpumask_t tmp_map; - cpus_complement(tmp_map, cpu_present_map); - cpu = first_cpu(tmp_map); - if (cpu >= NR_CPUS) - return -ENODEV; ++cpucount; /* * We can't use kernel_thread since we must avoid to @@ -920,6 +932,53 @@ void cpu_exit_clear(int cpu) do_exit_idle(); } + +struct warm_boot_cpu_info { + struct completion *complete; + int apicid; + int cpu; +}; + +static void __devinit do_warm_boot_cpu(void *p) +{ + struct warm_boot_cpu_info *info = p; + do_boot_cpu(info->apicid, info->cpu); + complete(info->complete); +} + +int __devinit smp_prepare_cpu(int apicid) +{ + DECLARE_COMPLETION(done); + struct warm_boot_cpu_info info; + struct work_struct task; + int cpu; + + lock_cpu_hotplug(); + cpu = alloc_cpu_id(); + + if (cpu < 0) + goto exit; + + info.complete = + info.apicid = apicid; + info.cpu = cpu; + INIT_WORK(, do_warm_boot_cpu, ); + + tsc_sync_disabled = 1; + + /* init low mem mapping */ + memcpy(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, + sizeof(swapper_pg_dir[0]) * KERNEL_PGD_PTRS); + flush_tlb_all(); + schedule_work(); + wait_for_completion(); + + tsc_sync_disabled = 0; + zap_low_mappings(); +exit: + unlock_cpu_hotplug(); + return cpu; +} #endif static void smp_tune_scheduling (void) { @@ -1064,7 +1123,7 @@ static void __init smp_boot_cpus(unsigne if (max_cpus <= cpucount+1) continue; - if (do_boot_cpu(apicid)) + if (((cpu = alloc_cpu_id()) > 0) && do_boot_cpu(apicid, cpu)) printk("CPU #%d not responding - cannot use it.\n", apicid); else @@ -1253,10 +1312,12 @@ void __init smp_cpus_done(unsigned int m setup_ioapic_dest(); #endif zap_low_mappings(); +#ifndef CONFIG_STR_SMP /* * Disable executability of the SMP trampoline: */ set_kernel_exec((unsigned long)trampoline_base, trampoline_exec); +#endif } void __init smp_intr_init(void) diff -puN kernel/power/main.c~warmboot_cpu kernel/power/main.c --- linux-2.6.11/kernel/power/main.c~warmboot_cpu 2005-04-04 09:13:48.601254896 +0800 +++ linux-2.6.11-root/kernel/power/main.c 2005-04-04 09:13:48.607253984 +0800 @@ -15,6 +15,7 @@ #include #include #include +#include #include "power.h" @@ -137,6 +138,24 @@ static char * pm_states[] = { static int enter_state(suspend_state_t state) { int error; +#ifdef CONFIG_STR_SMP +
[RFC 6/6]Physcial CPU hotadd and S3 SMP support
Boot a CPU at runtime and use it to support S3 SMP. Thanks, Shaohua --- linux-2.6.11-root/arch/i386/kernel/smpboot.c | 79 +++ linux-2.6.11-root/include/asm-i386/smp.h |4 + linux-2.6.11-root/kernel/power/main.c| 30 ++ 3 files changed, 104 insertions(+), 9 deletions(-) diff -puN arch/i386/kernel/smpboot.c~warmboot_cpu arch/i386/kernel/smpboot.c --- linux-2.6.11/arch/i386/kernel/smpboot.c~warmboot_cpu2005-04-04 09:13:48.600255048 +0800 +++ linux-2.6.11-root/arch/i386/kernel/smpboot.c2005-04-04 09:13:48.607253984 +0800 @@ -76,6 +76,12 @@ cpumask_t cpu_callin_map; cpumask_t cpu_callout_map; static cpumask_t smp_commenced_mask; +/* This is ugly, but TSC's upper 32 bits can't be written in eariler CPU + * (before prescott), there is no way to resync one AP against BP + * TBD: for prescott and above, we should use IA64's algorithm + */ +static int __devinit tsc_sync_disabled; + /* Per CPU bogomips and other parameters */ struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; @@ -412,7 +418,7 @@ static void __devinit smp_callin(void) /* * Synchronize the TSC with the BP */ - if (cpu_has_tsc cpu_khz) + if (cpu_has_tsc cpu_khz !tsc_sync_disabled) synchronize_tsc_ap(); } @@ -781,8 +787,19 @@ wakeup_secondary_cpu(int phys_apicid, un #endif /* WAKE_SECONDARY_VIA_INIT */ extern cpumask_t cpu_initialized; +static inline int alloc_cpu_id(void) +{ + cpumask_t tmp_map; + int cpu; -static int __devinit do_boot_cpu(int apicid) + cpus_complement(tmp_map, cpu_present_map); + cpu = first_cpu(tmp_map); + if (cpu = NR_CPUS) + return -ENODEV; + return cpu; +} + +static int __devinit do_boot_cpu(int apicid, int cpu) /* * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad * (ie clustered apic addressing mode), this is a LOGICAL apic ID. @@ -791,15 +808,10 @@ static int __devinit do_boot_cpu(int api { struct task_struct *idle; unsigned long boot_error; - int timeout, cpu; + int timeout; unsigned long start_eip; unsigned short nmi_high = 0, nmi_low = 0; - cpumask_t tmp_map; - cpus_complement(tmp_map, cpu_present_map); - cpu = first_cpu(tmp_map); - if (cpu = NR_CPUS) - return -ENODEV; ++cpucount; /* * We can't use kernel_thread since we must avoid to @@ -920,6 +932,53 @@ void cpu_exit_clear(int cpu) do_exit_idle(); } + +struct warm_boot_cpu_info { + struct completion *complete; + int apicid; + int cpu; +}; + +static void __devinit do_warm_boot_cpu(void *p) +{ + struct warm_boot_cpu_info *info = p; + do_boot_cpu(info-apicid, info-cpu); + complete(info-complete); +} + +int __devinit smp_prepare_cpu(int apicid) +{ + DECLARE_COMPLETION(done); + struct warm_boot_cpu_info info; + struct work_struct task; + int cpu; + + lock_cpu_hotplug(); + cpu = alloc_cpu_id(); + + if (cpu 0) + goto exit; + + info.complete = done; + info.apicid = apicid; + info.cpu = cpu; + INIT_WORK(task, do_warm_boot_cpu, info); + + tsc_sync_disabled = 1; + + /* init low mem mapping */ + memcpy(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, + sizeof(swapper_pg_dir[0]) * KERNEL_PGD_PTRS); + flush_tlb_all(); + schedule_work(task); + wait_for_completion(done); + + tsc_sync_disabled = 0; + zap_low_mappings(); +exit: + unlock_cpu_hotplug(); + return cpu; +} #endif static void smp_tune_scheduling (void) { @@ -1064,7 +1123,7 @@ static void __init smp_boot_cpus(unsigne if (max_cpus = cpucount+1) continue; - if (do_boot_cpu(apicid)) + if (((cpu = alloc_cpu_id()) 0) do_boot_cpu(apicid, cpu)) printk(CPU #%d not responding - cannot use it.\n, apicid); else @@ -1253,10 +1312,12 @@ void __init smp_cpus_done(unsigned int m setup_ioapic_dest(); #endif zap_low_mappings(); +#ifndef CONFIG_STR_SMP /* * Disable executability of the SMP trampoline: */ set_kernel_exec((unsigned long)trampoline_base, trampoline_exec); +#endif } void __init smp_intr_init(void) diff -puN kernel/power/main.c~warmboot_cpu kernel/power/main.c --- linux-2.6.11/kernel/power/main.c~warmboot_cpu 2005-04-04 09:13:48.601254896 +0800 +++ linux-2.6.11-root/kernel/power/main.c 2005-04-04 09:13:48.607253984 +0800 @@ -15,6 +15,7 @@ #include linux/errno.h #include linux/init.h #include linux/pm.h +#include linux/cpu.h #include power.h @@ -137,6 +138,24 @@ static char * pm_states[] = { static int enter_state(suspend_state_t state)