[RFC 6/6]Physcial CPU hotadd and S3 SMP support

2005-04-03 Thread Li Shaohua
Boot a CPU at runtime and use it to support S3 SMP.

Thanks,
Shaohua

---

 linux-2.6.11-root/arch/i386/kernel/smpboot.c |   79 +++
 linux-2.6.11-root/include/asm-i386/smp.h |4 +
 linux-2.6.11-root/kernel/power/main.c|   30 ++
 3 files changed, 104 insertions(+), 9 deletions(-)

diff -puN arch/i386/kernel/smpboot.c~warmboot_cpu arch/i386/kernel/smpboot.c
--- linux-2.6.11/arch/i386/kernel/smpboot.c~warmboot_cpu2005-04-04 
09:13:48.600255048 +0800
+++ linux-2.6.11-root/arch/i386/kernel/smpboot.c2005-04-04 
09:13:48.607253984 +0800
@@ -76,6 +76,12 @@ cpumask_t cpu_callin_map;
 cpumask_t cpu_callout_map;
 static cpumask_t smp_commenced_mask;
 
+/* This is ugly, but TSC's upper 32 bits can't be written in eariler CPU
+ * (before prescott), there is no way to resync one AP against BP
+ * TBD: for prescott and above, we should use IA64's algorithm
+ */
+static int __devinit tsc_sync_disabled;
+
 /* Per CPU bogomips and other parameters */
 struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
 
@@ -412,7 +418,7 @@ static void __devinit smp_callin(void)
/*
 *  Synchronize the TSC with the BP
 */
-   if (cpu_has_tsc && cpu_khz)
+   if (cpu_has_tsc && cpu_khz && !tsc_sync_disabled)
synchronize_tsc_ap();
 }
 
@@ -781,8 +787,19 @@ wakeup_secondary_cpu(int phys_apicid, un
 #endif /* WAKE_SECONDARY_VIA_INIT */
 
 extern cpumask_t cpu_initialized;
+static inline int alloc_cpu_id(void)
+{
+   cpumask_t   tmp_map;
+   int cpu;
 
-static int __devinit do_boot_cpu(int apicid)
+   cpus_complement(tmp_map, cpu_present_map);
+   cpu = first_cpu(tmp_map);
+   if (cpu >= NR_CPUS)
+   return -ENODEV;
+   return cpu;
+}
+
+static int __devinit do_boot_cpu(int apicid, int cpu)
 /*
  * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
  * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
@@ -791,15 +808,10 @@ static int __devinit do_boot_cpu(int api
 {
struct task_struct *idle;
unsigned long boot_error;
-   int timeout, cpu;
+   int timeout;
unsigned long start_eip;
unsigned short nmi_high = 0, nmi_low = 0;
-   cpumask_t   tmp_map;
 
-   cpus_complement(tmp_map, cpu_present_map);
-   cpu = first_cpu(tmp_map);
-   if (cpu >= NR_CPUS)
-   return -ENODEV;
++cpucount;
/*
 * We can't use kernel_thread since we must avoid to
@@ -920,6 +932,53 @@ void cpu_exit_clear(int cpu)
 
do_exit_idle();
 }
+
+struct warm_boot_cpu_info {
+   struct completion *complete;
+   int apicid;
+   int cpu;
+};
+
+static void __devinit do_warm_boot_cpu(void *p)
+{
+   struct warm_boot_cpu_info *info = p;
+   do_boot_cpu(info->apicid, info->cpu);
+   complete(info->complete);
+}
+
+int __devinit smp_prepare_cpu(int apicid)
+{
+   DECLARE_COMPLETION(done);
+   struct warm_boot_cpu_info info;
+   struct work_struct task;
+   int cpu;
+
+   lock_cpu_hotplug();
+   cpu = alloc_cpu_id();
+
+   if (cpu < 0)
+   goto exit;
+
+   info.complete = 
+   info.apicid = apicid;
+   info.cpu = cpu;
+   INIT_WORK(, do_warm_boot_cpu, );
+
+   tsc_sync_disabled = 1;
+
+   /* init low mem mapping */
+   memcpy(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
+   sizeof(swapper_pg_dir[0]) * KERNEL_PGD_PTRS);
+   flush_tlb_all();
+   schedule_work();
+   wait_for_completion();
+
+   tsc_sync_disabled = 0;
+   zap_low_mappings();
+exit:
+   unlock_cpu_hotplug();
+   return cpu;
+}
 #endif
 static void smp_tune_scheduling (void)
 {
@@ -1064,7 +1123,7 @@ static void __init smp_boot_cpus(unsigne
if (max_cpus <= cpucount+1)
continue;
 
-   if (do_boot_cpu(apicid))
+   if (((cpu = alloc_cpu_id()) > 0) && do_boot_cpu(apicid, cpu))
printk("CPU #%d not responding - cannot use it.\n",
apicid);
else
@@ -1253,10 +1312,12 @@ void __init smp_cpus_done(unsigned int m
setup_ioapic_dest();
 #endif
zap_low_mappings();
+#ifndef CONFIG_STR_SMP
/*
 * Disable executability of the SMP trampoline:
 */
set_kernel_exec((unsigned long)trampoline_base, trampoline_exec);
+#endif
 }
 
 void __init smp_intr_init(void)
diff -puN kernel/power/main.c~warmboot_cpu kernel/power/main.c
--- linux-2.6.11/kernel/power/main.c~warmboot_cpu   2005-04-04 
09:13:48.601254896 +0800
+++ linux-2.6.11-root/kernel/power/main.c   2005-04-04 09:13:48.607253984 
+0800
@@ -15,6 +15,7 @@
 #include 
 #include 
 #include 
+#include 
 
 
 #include "power.h"
@@ -137,6 +138,24 @@ static char * pm_states[] = {
 static int enter_state(suspend_state_t state)
 {
int error;
+#ifdef CONFIG_STR_SMP
+  

[RFC 6/6]Physcial CPU hotadd and S3 SMP support

2005-04-03 Thread Li Shaohua
Boot a CPU at runtime and use it to support S3 SMP.

Thanks,
Shaohua

---

 linux-2.6.11-root/arch/i386/kernel/smpboot.c |   79 +++
 linux-2.6.11-root/include/asm-i386/smp.h |4 +
 linux-2.6.11-root/kernel/power/main.c|   30 ++
 3 files changed, 104 insertions(+), 9 deletions(-)

diff -puN arch/i386/kernel/smpboot.c~warmboot_cpu arch/i386/kernel/smpboot.c
--- linux-2.6.11/arch/i386/kernel/smpboot.c~warmboot_cpu2005-04-04 
09:13:48.600255048 +0800
+++ linux-2.6.11-root/arch/i386/kernel/smpboot.c2005-04-04 
09:13:48.607253984 +0800
@@ -76,6 +76,12 @@ cpumask_t cpu_callin_map;
 cpumask_t cpu_callout_map;
 static cpumask_t smp_commenced_mask;
 
+/* This is ugly, but TSC's upper 32 bits can't be written in eariler CPU
+ * (before prescott), there is no way to resync one AP against BP
+ * TBD: for prescott and above, we should use IA64's algorithm
+ */
+static int __devinit tsc_sync_disabled;
+
 /* Per CPU bogomips and other parameters */
 struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
 
@@ -412,7 +418,7 @@ static void __devinit smp_callin(void)
/*
 *  Synchronize the TSC with the BP
 */
-   if (cpu_has_tsc  cpu_khz)
+   if (cpu_has_tsc  cpu_khz  !tsc_sync_disabled)
synchronize_tsc_ap();
 }
 
@@ -781,8 +787,19 @@ wakeup_secondary_cpu(int phys_apicid, un
 #endif /* WAKE_SECONDARY_VIA_INIT */
 
 extern cpumask_t cpu_initialized;
+static inline int alloc_cpu_id(void)
+{
+   cpumask_t   tmp_map;
+   int cpu;
 
-static int __devinit do_boot_cpu(int apicid)
+   cpus_complement(tmp_map, cpu_present_map);
+   cpu = first_cpu(tmp_map);
+   if (cpu = NR_CPUS)
+   return -ENODEV;
+   return cpu;
+}
+
+static int __devinit do_boot_cpu(int apicid, int cpu)
 /*
  * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
  * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
@@ -791,15 +808,10 @@ static int __devinit do_boot_cpu(int api
 {
struct task_struct *idle;
unsigned long boot_error;
-   int timeout, cpu;
+   int timeout;
unsigned long start_eip;
unsigned short nmi_high = 0, nmi_low = 0;
-   cpumask_t   tmp_map;
 
-   cpus_complement(tmp_map, cpu_present_map);
-   cpu = first_cpu(tmp_map);
-   if (cpu = NR_CPUS)
-   return -ENODEV;
++cpucount;
/*
 * We can't use kernel_thread since we must avoid to
@@ -920,6 +932,53 @@ void cpu_exit_clear(int cpu)
 
do_exit_idle();
 }
+
+struct warm_boot_cpu_info {
+   struct completion *complete;
+   int apicid;
+   int cpu;
+};
+
+static void __devinit do_warm_boot_cpu(void *p)
+{
+   struct warm_boot_cpu_info *info = p;
+   do_boot_cpu(info-apicid, info-cpu);
+   complete(info-complete);
+}
+
+int __devinit smp_prepare_cpu(int apicid)
+{
+   DECLARE_COMPLETION(done);
+   struct warm_boot_cpu_info info;
+   struct work_struct task;
+   int cpu;
+
+   lock_cpu_hotplug();
+   cpu = alloc_cpu_id();
+
+   if (cpu  0)
+   goto exit;
+
+   info.complete = done;
+   info.apicid = apicid;
+   info.cpu = cpu;
+   INIT_WORK(task, do_warm_boot_cpu, info);
+
+   tsc_sync_disabled = 1;
+
+   /* init low mem mapping */
+   memcpy(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
+   sizeof(swapper_pg_dir[0]) * KERNEL_PGD_PTRS);
+   flush_tlb_all();
+   schedule_work(task);
+   wait_for_completion(done);
+
+   tsc_sync_disabled = 0;
+   zap_low_mappings();
+exit:
+   unlock_cpu_hotplug();
+   return cpu;
+}
 #endif
 static void smp_tune_scheduling (void)
 {
@@ -1064,7 +1123,7 @@ static void __init smp_boot_cpus(unsigne
if (max_cpus = cpucount+1)
continue;
 
-   if (do_boot_cpu(apicid))
+   if (((cpu = alloc_cpu_id())  0)  do_boot_cpu(apicid, cpu))
printk(CPU #%d not responding - cannot use it.\n,
apicid);
else
@@ -1253,10 +1312,12 @@ void __init smp_cpus_done(unsigned int m
setup_ioapic_dest();
 #endif
zap_low_mappings();
+#ifndef CONFIG_STR_SMP
/*
 * Disable executability of the SMP trampoline:
 */
set_kernel_exec((unsigned long)trampoline_base, trampoline_exec);
+#endif
 }
 
 void __init smp_intr_init(void)
diff -puN kernel/power/main.c~warmboot_cpu kernel/power/main.c
--- linux-2.6.11/kernel/power/main.c~warmboot_cpu   2005-04-04 
09:13:48.601254896 +0800
+++ linux-2.6.11-root/kernel/power/main.c   2005-04-04 09:13:48.607253984 
+0800
@@ -15,6 +15,7 @@
 #include linux/errno.h
 #include linux/init.h
 #include linux/pm.h
+#include linux/cpu.h
 
 
 #include power.h
@@ -137,6 +138,24 @@ static char * pm_states[] = {
 static int enter_state(suspend_state_t state)