# HG changeset patch
# User Jimi Xenidis <[EMAIL PROTECTED]>
# Node ID a384dbf50d5934ba93eea17eccb7e43cf408dd87
# Parent  bbf2db4ddf5400e908ee6bf92ac798e5cfed82a0
[XEN][POWERPC] Turn on SMP.. Finally.

The following patch uses Xen specific methods to spin up secondary
processors and add them to the Linux devtree (not the flat-devtree).
Specifically:

 - Adds HYPERVISOR_vcpu_op() for probing and spinning.
 - "Hot-Plug" new CPU entries into the devtree
 - Start CPUs int he same place tha OF/prom_init.c would have
 - Wire up SMP IPI to Xen event channels
 - 6 line common code change in LinuxPPC to set the # possible CPUs correctly

Tested on JS21 (4-way) and Maple(2-way) creating 1-1 Dom0 and several
VIO/DomUs up to 32-way.

NOTE: we cannot yet:
 - _add_ a CPU after the normal boot spinup process
 - remove a CPU

Signed-off-by: Jimi Xenidis <[EMAIL PROTECTED]>
---
 arch/powerpc/kernel/setup-common.c       |    6 
 arch/powerpc/platforms/xen/Makefile      |    1 
 arch/powerpc/platforms/xen/hcall.c       |   30 ++
 arch/powerpc/platforms/xen/setup.c       |   36 --
 arch/powerpc/platforms/xen/setup.h       |    1 
 arch/powerpc/platforms/xen/smp.c         |  424 +++++++++++++++++++++++++++++++
 include/asm-powerpc/xen/asm/hypercall.h  |    1 
 include/asm-powerpc/xen/asm/hypervisor.h |    2 
 8 files changed, 468 insertions(+), 33 deletions(-)

diff -r bbf2db4ddf54 -r a384dbf50d59 arch/powerpc/kernel/setup-common.c
--- a/arch/powerpc/kernel/setup-common.c        Tue Dec 19 09:22:37 2006 -0500
+++ b/arch/powerpc/kernel/setup-common.c        Sun Jan 21 08:34:45 2007 -0500
@@ -388,6 +388,12 @@ void __init smp_setup_cpu_maps(void)
                }
        }
 
+       if (machine_is(xen)) {
+               /* something more inteligent perhaps? */
+               for (cpu = 0; cpu < NR_CPUS; cpu++)
+                       cpu_set(cpu, cpu_possible_map);
+       }
+
 #ifdef CONFIG_PPC64
        /*
         * On pSeries LPAR, we need to know how many cpus
diff -r bbf2db4ddf54 -r a384dbf50d59 arch/powerpc/platforms/xen/Makefile
--- a/arch/powerpc/platforms/xen/Makefile       Tue Dec 19 09:22:37 2006 -0500
+++ b/arch/powerpc/platforms/xen/Makefile       Sun Jan 21 08:34:45 2007 -0500
@@ -3,6 +3,7 @@ obj-y   += hcall.o
 obj-y  += hcall.o
 obj-y  += reboot.o
 obj-y  += setup.o
+obj-y  += smp.o
 obj-y  += time.o
 obj-y  += udbg_xen.o
 obj-y  += xen_guest.o
diff -r bbf2db4ddf54 -r a384dbf50d59 arch/powerpc/platforms/xen/hcall.c
--- a/arch/powerpc/platforms/xen/hcall.c        Tue Dec 19 09:22:37 2006 -0500
+++ b/arch/powerpc/platforms/xen/hcall.c        Sun Jan 21 08:34:45 2007 -0500
@@ -33,7 +33,7 @@
 #include <xen/interface/sched.h>
 #include <xen/interface/event_channel.h>
 #include <xen/interface/physdev.h>
-#include <xen/interface/grant_table.h>
+#include <xen/interface/vcpu.h>
 #include <xen/public/privcmd.h>
 #include <asm/hypercall.h>
 #include <asm/page.h>
@@ -599,3 +599,31 @@ int arch_privcmd_hypercall(privcmd_hyper
        }
 }
 
+int HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args)
+{
+       int argsize;
+       const unsigned long hcall = __HYPERVISOR_vcpu_op;
+       void *desc;
+
+       switch (cmd) {
+       case  VCPUOP_initialise:
+               argsize = sizeof(vcpu_guest_context_t);
+               break;
+       case VCPUOP_up:
+       case VCPUOP_down:
+       case VCPUOP_is_up:
+               return plpar_hcall_norets(XEN_MARK(hcall), cmd, vcpuid, 0);
+
+       case VCPUOP_get_runstate_info:
+               argsize = sizeof (vcpu_runstate_info_t);
+               break;
+       default:
+               printk(KERN_ERR "%s: unknown version cmd %d\n", __func__, cmd);
+               return -ENOSYS;
+       }
+
+       desc = xencomm_create_inline(extra_args);
+       (void)argsize;
+       return plpar_hcall_norets(XEN_MARK(hcall), cmd, vcpuid, desc);
+}
+       
diff -r bbf2db4ddf54 -r a384dbf50d59 arch/powerpc/platforms/xen/setup.c
--- a/arch/powerpc/platforms/xen/setup.c        Tue Dec 19 09:22:37 2006 -0500
+++ b/arch/powerpc/platforms/xen/setup.c        Sun Jan 21 08:34:45 2007 -0500
@@ -168,42 +168,10 @@ static void xen_power_save(void)
        HYPERVISOR_sched_op(SCHEDOP_block, NULL);
 }
 
-#ifdef CONFIG_SMP
-
-int __init smp_xen_probe(void)
-{
-       return 1;
-}
-
-void smp_xen_message_pass(int target, int msg)
-{
-       printk("%s(%d, %d)\n", __func__, target, msg);
-}
-
-void __devinit smp_xen_setup_cpu(int cpu)
-{
-       printk("%s(%d)\n", __func__, cpu);
-}
-
-struct smp_ops_t xen_smp_ops = {
-       .probe          = smp_xen_probe,
-       .message_pass   = smp_xen_message_pass,
-       .kick_cpu       = smp_generic_kick_cpu,
-       .setup_cpu      = smp_xen_setup_cpu,
-       .give_timebase  = smp_generic_give_timebase,
-       .take_timebase  = smp_generic_take_timebase,
-};
-#endif /* CONFIG_SMP */
-
 void __init xen_setup_arch(void)
 {
        /* init to some ~sane value until calibrate_delay() runs */
        loops_per_jiffy = 50000000;
-
-       /* Setup SMP callback */
-#ifdef CONFIG_SMP
-       smp_ops = &xen_smp_ops;
-#endif
 
        /* Lookup PCI hosts */
        if (is_initial_xendomain())
@@ -211,6 +179,10 @@ void __init xen_setup_arch(void)
 
 #ifdef CONFIG_DUMMY_CONSOLE
        conswitchp = &dummy_con;
+#endif
+#ifdef CONFIG_SMP
+       /* let them fly */
+       xen_setup_smp();
 #endif
 
        printk(KERN_INFO "Using Xen idle loop\n");
diff -r bbf2db4ddf54 -r a384dbf50d59 arch/powerpc/platforms/xen/setup.h
--- a/arch/powerpc/platforms/xen/setup.h        Tue Dec 19 09:22:37 2006 -0500
+++ b/arch/powerpc/platforms/xen/setup.h        Sun Jan 21 08:34:45 2007 -0500
@@ -27,3 +27,4 @@ extern void free_foreign_page(struct pag
 extern void free_foreign_page(struct page *page);
 
 extern void __init xen_setup_time(struct machdep_calls *host_md);
+extern void xen_setup_smp(void);
diff -r bbf2db4ddf54 -r a384dbf50d59 include/asm-powerpc/xen/asm/hypercall.h
--- a/include/asm-powerpc/xen/asm/hypercall.h   Tue Dec 19 09:22:37 2006 -0500
+++ b/include/asm-powerpc/xen/asm/hypercall.h   Sun Jan 21 08:34:45 2007 -0500
@@ -44,6 +44,7 @@ extern int HYPERVISOR_physdev_op(int cmd
 extern int HYPERVISOR_physdev_op(int cmd, void *op);
 extern int HYPERVISOR_grant_table_op(unsigned int cmd, void *uop,
                unsigned int count);
+extern int HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args);
 extern int HYPERVISOR_memory_op(unsigned int cmd, void *arg);
 extern int HYPERVISOR_multicall(void *call_list, int nr_calls);
 
diff -r bbf2db4ddf54 -r a384dbf50d59 include/asm-powerpc/xen/asm/hypervisor.h
--- a/include/asm-powerpc/xen/asm/hypervisor.h  Tue Dec 19 09:22:37 2006 -0500
+++ b/include/asm-powerpc/xen/asm/hypervisor.h  Sun Jan 21 08:34:45 2007 -0500
@@ -146,6 +146,8 @@ int direct_remap_pfn_range(struct vm_are
 #define DYNIRQ_BASE            (PIRQ_BASE + NR_PIRQS)
 #define NR_DYNIRQS             256
 
+#define NR_IPIS 4              /* PPC_MSG_DEBUGGER_BREAK + 1 */
+
 #if NR_IRQS < (NR_PIRQS + NR_DYNIRQS)
 #error to many Xen IRQs
 #endif
diff -r bbf2db4ddf54 -r a384dbf50d59 arch/powerpc/platforms/xen/smp.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/powerpc/platforms/xen/smp.c  Sun Jan 21 08:34:45 2007 -0500
@@ -0,0 +1,424 @@
+#include <linux/kernel.h>
+#include <linux/config.h>
+#include <linux/bootmem.h>
+#include <linux/irq.h>
+#include <linux/smp.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/vcpu.h>
+#include <xen/evtchn.h>
+#include <asm/prom.h>
+#include <asm/udbg.h>
+#include <asm/hypervisor.h>
+#include "setup.h"
+
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG(fmt...) printk(KERN_EMERG fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+static inline void *xen_of_alloc(ulong size)
+{
+       if (mem_init_done)
+               return kmalloc(size, GFP_KERNEL);
+       return alloc_bootmem(size);
+}
+static inline void xen_of_free(void *ptr)
+{
+       /* if this happens with the boot allocator then we are screwed */
+       BUG_ON(!mem_init_done);
+       kfree(ptr);
+}
+
+static struct property *dup_prop(struct property *op)
+{
+       struct property *np;
+       void *p;
+       ulong sz;
+
+
+       /* allocate everything in one go in case it fails */
+       sz = sizeof (*np); /* prop node */
+       sz += strlen(op->name) + 1; /* prop name */
+       sz += op->length; /* prop value */
+               
+       p = xen_of_alloc(sz);
+       if (!p)
+               return NULL;
+       memset(p, 0, sz);
+
+       /* prop node first */
+       np = p;
+       p += sizeof (*np);
+
+       /* value next becuase we want it aligned */
+       np->value = p;
+       p += op->length;
+
+       /* name */
+       np->name = p;
+
+       /* copy it all */
+       strcpy(np->name, op->name);
+       np->length = op->length;
+       memcpy(np->value, op->value, np->length);
+
+       return np;
+}
+
+static int dup_properties(struct device_node *dst, struct device_node *src)
+{
+       struct property *op;
+       struct property *np;
+       struct property *lp;
+       int rc = 0;
+
+       DBG("%s: duping to new cpu node: %s\n", __func__, dst->full_name);
+
+       np = lp = NULL;
+       for (op = src->properties; op != 0; op = op->next) {
+               lp = np;
+               np = dup_prop(op);
+               if (!np)
+                       break;
+
+               prom_add_property(dst, np);
+       }
+
+       if (!np) {
+               DBG("%s: FAILED duping: %s\n", __func__, dst->full_name);
+               /* we could not allocate enuff so free what we have
+                * allocated */
+               rc = -ENOMEM;
+               for (op = dst->properties; lp && op != lp; op = op->next)
+                       xen_of_free(op);
+       }
+
+       return rc;
+}
+
+/* returns added device node so it can be added to procfs in the case
+ * of hotpluging */
+static struct device_node *xen_add_vcpu_node(struct device_node *boot_cpu,
+                                            uint cpu)
+{
+       struct device_node *new_cpu;
+       struct property *pp;
+       void *p;
+       int sz;
+       int type_sz;
+       int name_sz;
+
+       DBG("%s: boot cpu: %s\n", __func__, boot_cpu->full_name);
+
+       /* allocate in one shot in case we fail */
+       name_sz = strlen(boot_cpu->name) + 1;
+       type_sz = strlen(boot_cpu->type) + 1;
+
+       sz = sizeof (*new_cpu); /* the node */
+       sz += strlen(boot_cpu->full_name) + 3; /* full_name */
+       sz += name_sz; /* name */
+       sz += type_sz; /* type */
+
+       p = xen_of_alloc(sz);
+       if (!p)
+               return NULL;
+       memset(p, 0, sz);
+
+       /* the node */
+       new_cpu = p;
+       p += sizeof (*new_cpu);
+       
+       /* name */
+       new_cpu->name = p;
+       strcpy(new_cpu->name, boot_cpu->name);
+       p += name_sz;
+       
+       /* type */
+       new_cpu->type = p;
+       strcpy(new_cpu->type, boot_cpu->type);
+       p += type_sz;
+
+       /* full_name */
+       new_cpu->full_name = p;
+
+       /* assemble new full_name */
+       pp = of_find_property(boot_cpu, "name", NULL);
+       if (!pp)
+               panic("%s: no name prop\n", __func__);
+
+       DBG("%s: name is: %s = %s\n", __func__, pp->name, pp->value);
+       sprintf(new_cpu->full_name, "/cpus/[EMAIL PROTECTED]", pp->value, cpu);
+
+       if (dup_properties(new_cpu, boot_cpu)) {
+               xen_of_free(new_cpu);
+               return NULL;
+       }
+
+       /* fixup reg property */
+       DBG("%s: updating reg: %d\n", __func__, cpu);
+       pp = of_find_property(new_cpu, "reg", NULL);
+       if (!pp)
+               panic("%s: no reg prop\n", __func__);
+       *(int *)pp->value = cpu;
+
+       if (mem_init_done)
+               OF_MARK_DYNAMIC(new_cpu);
+
+       kref_init(&new_cpu->kref);
+
+       /* insert the node */
+       new_cpu->parent = of_get_parent(boot_cpu);
+       of_attach_node(new_cpu);
+       of_node_put(new_cpu->parent);
+
+       return new_cpu;
+}
+
+static void cpu_initialize_context(unsigned int vcpu, ulong entry)
+{
+       vcpu_guest_context_t ctxt;
+
+       memset(&ctxt.user_regs, 0x55, sizeof(ctxt.user_regs));
+
+       ctxt.user_regs.pc = entry;
+       ctxt.user_regs.msr = 0;
+       ctxt.user_regs.gprs[1] = 0; /* Linux uses its own stack */
+       ctxt.user_regs.gprs[3] = vcpu;
+
+       /* XXX verify this *** */
+       /* There is a buggy kernel that does not zero the "local_paca", so
+        * we must make sure this register is 0 */
+       ctxt.user_regs.gprs[13] = 0;
+
+       DBG("%s: initializing vcpu: %d\n", __func__, vcpu);
+
+       if (HYPERVISOR_vcpu_op(VCPUOP_initialise, vcpu, &ctxt))
+               panic("%s: VCPUOP_initialise failed, vcpu: %d\n",
+                      __func__, vcpu);
+
+}
+
+static int xen_start_vcpu(uint vcpu, ulong entry)
+{
+       DBG("%s: starting vcpu: %d\n", __func__, vcpu);
+
+       cpu_initialize_context(vcpu, entry);
+
+       DBG("%s: Spinning up vcpu: %d\n", __func__, vcpu);
+       return HYPERVISOR_vcpu_op(VCPUOP_up, vcpu, NULL);
+}
+
+extern void __secondary_hold(void);
+extern unsigned long __secondary_hold_spinloop;
+extern unsigned long __secondary_hold_acknowledge;
+
+static void xen_boot_secondary_vcpus(void)
+{
+       int vcpu;
+       int rc;
+       const unsigned long mark = (unsigned long)-1;
+       unsigned long *spinloop = &__secondary_hold_spinloop;
+       unsigned long *acknowledge = &__secondary_hold_acknowledge;
+#ifdef CONFIG_PPC64
+       /* __secondary_hold is actually a descriptor, not the text address */
+       unsigned long secondary_hold = __pa(*(unsigned long *)__secondary_hold);
+#else
+       unsigned long secondary_hold = __pa(__secondary_hold);
+#endif
+       struct device_node *boot_cpu;
+
+       DBG("%s: finding CPU node\n", __func__);
+       boot_cpu = of_find_node_by_type(NULL, "cpu");
+       if (!boot_cpu)
+               panic("%s: Cannot find Booting CPU node\n", __func__);
+
+       /* Set the common spinloop variable, so all of the secondary cpus
+        * will block when they are awakened from their OF spinloop.
+        * This must occur for both SMP and non SMP kernels, since OF will
+        * be trashed when we move the kernel.
+        */
+       *spinloop = 0;
+
+       DBG("%s: Searching for all vcpu numbers > 0\n", __func__);
+       /* try and start as many as we can */
+       for (vcpu = 1; vcpu < NR_CPUS; vcpu++) {
+               int i;
+
+               rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, vcpu, NULL);
+               if (rc < 0)
+                       continue;
+
+               DBG("%s: Found vcpu: %d\n", __func__, vcpu);
+               /* Init the acknowledge var which will be reset by
+                * the secondary cpu when it awakens from its OF
+                * spinloop.
+                */
+               *acknowledge = mark;
+
+               DBG("%s: Starting vcpu: %d at pc: 0x%lx\n", __func__,
+                   vcpu, secondary_hold);
+               rc = xen_start_vcpu(vcpu, secondary_hold);
+               if (rc)
+                       panic("%s: xen_start_vpcu() failed\n", __func__);
+
+
+               DBG("%s: Waiting for ACK on vcpu: %d\n", __func__, vcpu);
+               for (i = 0; (i < 100000000) && (*acknowledge == mark); i++)
+                       mb();
+
+               if (*acknowledge == vcpu)
+                       DBG("%s: Recieved for ACK on vcpu: %d\n",
+                           __func__, vcpu);
+
+               xen_add_vcpu_node(boot_cpu, vcpu);
+
+               cpu_set(vcpu, cpu_present_map);
+               set_hard_smp_processor_id(vcpu, vcpu);
+       }
+       of_node_put(boot_cpu);
+       DBG("%s: end...\n", __func__);
+}
+
+static int __init smp_xen_probe(void)
+{
+       return cpus_weight(cpu_present_map);
+}
+
+static irqreturn_t xen_ppc_msg_reschedule(int irq, void *dev_id,
+                                         struct pt_regs *regs)
+{
+       smp_message_recv(PPC_MSG_RESCHEDULE, regs);
+       return IRQ_HANDLED;
+}
+
+static irqreturn_t xen_ppc_msg_call_function(int irq, void *dev_id,
+                                            struct pt_regs *regs)
+{
+       smp_message_recv(PPC_MSG_CALL_FUNCTION, regs);
+       return IRQ_HANDLED;
+}
+
+static irqreturn_t xen_ppc_msg_debugger_break(int irq, void *dev_id,
+                                         struct pt_regs *regs)
+{
+       smp_message_recv(PPC_MSG_DEBUGGER_BREAK, regs);
+       return IRQ_HANDLED;
+}
+
+struct message {
+       irqreturn_t (*f)(int, void *, struct pt_regs *);
+       int num;
+       char *name;
+};
+static struct message ipi_msgs[] = {
+       {
+               .num = PPC_MSG_RESCHEDULE,
+               .f = xen_ppc_msg_reschedule,
+               .name = "IPI-resched"
+       },
+       {
+               .num = PPC_MSG_CALL_FUNCTION,
+               .f = xen_ppc_msg_call_function,
+               .name = "IPI-function"
+               },
+       {
+               .num = PPC_MSG_DEBUGGER_BREAK,
+               .f = xen_ppc_msg_debugger_break,
+               .name = "IPI-debug"
+       }
+};
+
+DECLARE_PER_CPU(int, ipi_to_irq[NR_IPIS]);
+
+static void __devinit smp_xen_setup_cpu(int cpu)
+{
+       int irq;
+       int i;
+       const int nr_ipis = ARRAY_SIZE(__get_cpu_var(ipi_to_irq));
+
+       /* big scary include web could mess with our values, so we
+        * make sure they are sane */
+       BUG_ON(ARRAY_SIZE(ipi_msgs) > nr_ipis);
+
+       for (i = 0; i < ARRAY_SIZE(ipi_msgs); i++) {
+               BUG_ON(ipi_msgs[i].num >= nr_ipis);
+
+               irq = bind_ipi_to_irqhandler(ipi_msgs[i].num,
+                                            cpu,
+                                            ipi_msgs[i].f,
+                                            SA_INTERRUPT,
+                                            ipi_msgs[i].name,
+                                            NULL);
+               BUG_ON(irq < 0);
+               per_cpu(ipi_to_irq, cpu)[ipi_msgs[i].num] = irq;
+               DBG("%s: cpu: %d vector :%d irq: %d\n",
+                      __func__, cpu, ipi_msgs[i].num, irq);
+       }
+}
+
+static inline void send_IPI_one(unsigned int cpu, int vector)
+{
+       int irq;
+
+       irq = per_cpu(ipi_to_irq, cpu)[vector];
+       BUG_ON(irq < 0);
+
+       DBG("%s: cpu: %d vector :%d irq: %d!\n",
+              __func__, cpu, vector, irq);
+       DBG("%s: per_cpu[%p]: %d %d %d %d\n",
+              __func__, per_cpu(ipi_to_irq, cpu),
+              per_cpu(ipi_to_irq, cpu)[0],
+              per_cpu(ipi_to_irq, cpu)[1],
+              per_cpu(ipi_to_irq, cpu)[2],
+              per_cpu(ipi_to_irq, cpu)[3]);
+
+       notify_remote_via_irq(irq);
+}
+
+static void smp_xen_message_pass(int target, int msg)
+{
+       int cpu;
+
+       switch (msg) {
+       case PPC_MSG_RESCHEDULE:
+       case PPC_MSG_CALL_FUNCTION:
+       case PPC_MSG_DEBUGGER_BREAK:
+               break;
+       default:
+               panic("SMP %d: smp_message_pass: unknown msg %d\n",
+                      smp_processor_id(), msg);
+               return;
+       }
+       switch (target) {
+       case MSG_ALL:
+       case MSG_ALL_BUT_SELF:
+               for_each_online_cpu(cpu) {
+                       if (target == MSG_ALL_BUT_SELF &&
+                           cpu == smp_processor_id())
+                               continue;
+                       send_IPI_one(cpu, msg);
+               }
+               break;
+       default:
+               send_IPI_one(target, msg);
+               break;
+       }
+}
+
+static struct smp_ops_t xen_smp_ops = {
+       .probe          = smp_xen_probe,
+       .message_pass   = smp_xen_message_pass,
+       .kick_cpu       = smp_generic_kick_cpu,
+       .setup_cpu      = smp_xen_setup_cpu,
+};
+
+void xen_setup_smp(void)
+{
+       smp_ops = &xen_smp_ops;
+
+       xen_boot_secondary_vcpus();
+       smp_release_cpus();
+}

_______________________________________________
Xen-ppc-devel mailing list
Xen-ppc-devel@lists.xensource.com
http://lists.xensource.com/xen-ppc-devel

Reply via email to