Re: [XenPPC] [xenppc-unstable] [XEN][POWERPC] SMP/IPI/MB combined
On Tue, Nov 28, 2006 at 10:47:31AM -0500, Jimi Xenidis wrote: > On Nov 28, 2006, at 1:37 AM, Amos Waterland wrote: > > The compiler is not told that read_clocks_cpumask is volatile, so it > > is free to turn that loop into an infinite loop, as my gcc 4.1.1 > > cross-compiler does. I am surprised that other Xen architectures > > have not seen the same problem. > > Found it, cpu_relax() is supposed to contain barrier() call. > My fault but it coulda been hollis :) Excellent, thanks. > you _always_ have to wait for call_data.started it means that its > safe to reuse call_data. Yes, I know :) I only did that as a crude approximation, the proper solution (pending your design comments), is not serializing do_external with a lock (see the next patch I sent). ___ Xen-ppc-devel mailing list Xen-ppc-devel@lists.xensource.com http://lists.xensource.com/xen-ppc-devel
Re: [XenPPC] [xenppc-unstable] [XEN][POWERPC] SMP/IPI/MB combined
On Nov 28, 2006, at 1:37 AM, Amos Waterland wrote: This will have to be reworked and broken up into individual parts for submission, but here is what is necessary to make 'C-a C-a C-a t' work properly. Jimi, when you disassemble xen-syms compiled without this patch, do you see a bogus infinite loop in read_clocks? looked briefly but did not notice it. The compiler is not told that read_clocks_cpumask is volatile, so it is free to turn that loop into an infinite loop, as my gcc 4.1.1 cross-compiler does. I am surprised that other Xen architectures have not seen the same problem. Found it, cpu_relax() is supposed to contain barrier() call. My fault but it coulda been hollis :) diff -r cc45282daf3d xen/include/asm-powerpc/processor.h --- a/xen/include/asm-powerpc/processor.h Mon Nov 27 17:17:07 2006 -0500 +++ b/xen/include/asm-powerpc/processor.h Tue Nov 28 10:19:09 2006 -0500 @@ -152,7 +152,7 @@ static inline void nop(void) { static inline void nop(void) { __asm__ __volatile__ ("nop"); } -#define cpu_relax() nop() +#define cpu_relax() barrier() static inline unsigned int mfpir(void) { this will solve the volatile issue and am pushing this now. more below.. arch/powerpc/smp.c| 18 ++ common/keyhandler.c |2 +- include/xen/cpumask.h |2 +- 3 files changed, 12 insertions(+), 10 deletions(-) diff -r 305751a5281e xen/arch/powerpc/smp.c --- a/xen/arch/powerpc/smp.cWed Nov 22 16:29:25 2006 -0500 +++ b/xen/arch/powerpc/smp.cTue Nov 28 00:45:24 2006 -0500 @@ -91,31 +91,33 @@ int on_selected_cpus( int wait) { int t, retval = 0, nr_cpus = cpus_weight(selected); +int stall = timebase_freq * 10; spin_lock(&call_lock); call_data.func = func; call_data.info = info; call_data.wait = wait; -call_data.wait = 1; /* Until we get RCU around call_data. */ atomic_set(&call_data.started, 0); atomic_set(&call_data.finished, 0); mb(); send_IPI_mask(selected, CALL_FUNCTION_VECTOR); -/* We always wait for an initiation ACK from remote CPU. */ -for (t = 0; atomic_read(&call_data.started) != nr_cpus; t++) { -if (t && t % timebase_freq == 0) { -printk("IPI start stall: %d ACKS to %d SYNS\n", - atomic_read(&call_data.started), nr_cpus); -} +/* If told to, we wait for an initiation ACK from remote CPU. */ +if (wait) { + for (t = 0; atomic_read(&call_data.started) != nr_cpus; t++) { + if (t > 0 && t % stall == 0) { + printk("IPI start stall: %d ACKS to %d SYNS\n", + atomic_read(&call_data.started), nr_cpus); + } + } you _always_ have to wait for call_data.started it means that its safe to reuse call_data. } /* If told to, we wait for a completion ACK from remote CPU. */ if (wait) { for (t = 0; atomic_read(&call_data.finished) != nr_cpus; t+ +) { -if (t > timebase_freq && t % timebase_freq == 0) { +if (t > 0 && t % stall == 0) { printk("IPI finish stall: %d ACKS to %d SYNS\n", atomic_read(&call_data.finished), nr_cpus); } diff -r 305751a5281e xen/common/keyhandler.c --- a/xen/common/keyhandler.c Wed Nov 22 16:29:25 2006 -0500 +++ b/xen/common/keyhandler.c Tue Nov 28 00:12:14 2006 -0500 @@ -193,7 +193,7 @@ static void dump_domains(unsigned char k read_unlock(&domlist_lock); } ___ Xen-ppc-devel mailing list Xen-ppc-devel@lists.xensource.com http://lists.xensource.com/xen-ppc-devel
Re: [XenPPC] [xenppc-unstable] [XEN][POWERPC] SMP/IPI/MB combined
This will have to be reworked and broken up into individual parts for submission, but here is what is necessary to make 'C-a C-a C-a t' work properly. Jimi, when you disassemble xen-syms compiled without this patch, do you see a bogus infinite loop in read_clocks? The compiler is not told that read_clocks_cpumask is volatile, so it is free to turn that loop into an infinite loop, as my gcc 4.1.1 cross-compiler does. I am surprised that other Xen architectures have not seen the same problem. arch/powerpc/smp.c| 18 ++ common/keyhandler.c |2 +- include/xen/cpumask.h |2 +- 3 files changed, 12 insertions(+), 10 deletions(-) diff -r 305751a5281e xen/arch/powerpc/smp.c --- a/xen/arch/powerpc/smp.cWed Nov 22 16:29:25 2006 -0500 +++ b/xen/arch/powerpc/smp.cTue Nov 28 00:45:24 2006 -0500 @@ -91,31 +91,33 @@ int on_selected_cpus( int wait) { int t, retval = 0, nr_cpus = cpus_weight(selected); +int stall = timebase_freq * 10; spin_lock(&call_lock); call_data.func = func; call_data.info = info; call_data.wait = wait; -call_data.wait = 1; /* Until we get RCU around call_data. */ atomic_set(&call_data.started, 0); atomic_set(&call_data.finished, 0); mb(); send_IPI_mask(selected, CALL_FUNCTION_VECTOR); -/* We always wait for an initiation ACK from remote CPU. */ -for (t = 0; atomic_read(&call_data.started) != nr_cpus; t++) { -if (t && t % timebase_freq == 0) { -printk("IPI start stall: %d ACKS to %d SYNS\n", - atomic_read(&call_data.started), nr_cpus); -} +/* If told to, we wait for an initiation ACK from remote CPU. */ +if (wait) { + for (t = 0; atomic_read(&call_data.started) != nr_cpus; t++) { + if (t > 0 && t % stall == 0) { + printk("IPI start stall: %d ACKS to %d SYNS\n", + atomic_read(&call_data.started), nr_cpus); + } + } } /* If told to, we wait for a completion ACK from remote CPU. */ if (wait) { for (t = 0; atomic_read(&call_data.finished) != nr_cpus; t++) { -if (t > timebase_freq && t % timebase_freq == 0) { +if (t > 0 && t % stall == 0) { printk("IPI finish stall: %d ACKS to %d SYNS\n", atomic_read(&call_data.finished), nr_cpus); } diff -r 305751a5281e xen/common/keyhandler.c --- a/xen/common/keyhandler.c Wed Nov 22 16:29:25 2006 -0500 +++ b/xen/common/keyhandler.c Tue Nov 28 00:12:14 2006 -0500 @@ -193,7 +193,7 @@ static void dump_domains(unsigned char k read_unlock(&domlist_lock); } -static cpumask_t read_clocks_cpumask = CPU_MASK_NONE; +static cpumask_t volatile read_clocks_cpumask = CPU_MASK_NONE; static s_time_t read_clocks_time[NR_CPUS]; static void read_clocks_slave(void *unused) diff -r 305751a5281e xen/include/xen/cpumask.h --- a/xen/include/xen/cpumask.h Wed Nov 22 16:29:25 2006 -0500 +++ b/xen/include/xen/cpumask.h Tue Nov 28 00:12:42 2006 -0500 @@ -177,7 +177,7 @@ static inline int __cpus_subset(const cp } #define cpus_empty(src) __cpus_empty(&(src), NR_CPUS) -static inline int __cpus_empty(const cpumask_t *srcp, int nbits) +static inline int __cpus_empty(const cpumask_t volatile *srcp, int nbits) { return bitmap_empty(srcp->bits, nbits); } ___ Xen-ppc-devel mailing list Xen-ppc-devel@lists.xensource.com http://lists.xensource.com/xen-ppc-devel
Re: [XenPPC] [xenppc-unstable] [XEN][POWERPC] SMP/IPI/MB combined
On Nov 22, 2006, at 3:01 PM, Xen patchbot-xenppc-unstable wrote: # HG changeset patch # User Jimi Xenidis <[EMAIL PROTECTED]> # Node ID ea41ccaa8d77134b8fb55e8b002d358e67c47152 # Parent ce8c1e26b2aebd64c8a2f02e20ed46d587f42870 [XEN][POWERPC] SMP/IPI/MB combined After pushing this patch I when to fix/update some ^A^A^A commands which I did. Amos, it seems that: (XEN) key 't' (ascii '74') => display multi-cpu clock info has problems. I filed a bug to cover this: http://bugzilla.xensource.com/bugzilla/show_bug.cgi?id=820 -JX ___ Xen-ppc-devel mailing list Xen-ppc-devel@lists.xensource.com http://lists.xensource.com/xen-ppc-devel
[XenPPC] [xenppc-unstable] [XEN][POWERPC] SMP/IPI/MB combined
# HG changeset patch # User Jimi Xenidis <[EMAIL PROTECTED]> # Node ID ea41ccaa8d77134b8fb55e8b002d358e67c47152 # Parent ce8c1e26b2aebd64c8a2f02e20ed46d587f42870 [XEN][POWERPC] SMP/IPI/MB combined This patch rolls up and rebases the following patches for submission against current tip of tree: * Memory barrier after SP store * IPI support * SMP support The only changes from the previous submission other than trivial fast-forward merges are to remove the ERAT flush, since it was comitted seperately, and to make the status messages about waiting for remote function completion ACKs only kick in after a full second has passed. Note that this path REQUIRES that some form of the dom0 Linux patch titled "Make Linux bail out of IPI vector reset" be applied. Signed-off-by: Amos Waterland <[EMAIL PROTECTED]> Signed-off-by: Jimi Xenidis <[EMAIL PROTECTED]> --- xen/arch/powerpc/external.c| 32 xen/arch/powerpc/mpic.c|9 - xen/arch/powerpc/mpic_init.c | 48 +++ xen/arch/powerpc/setup.c | 50 --- xen/arch/powerpc/smp.c | 135 - xen/include/asm-powerpc/mach-default/irq_vectors.h | 22 --- xen/include/asm-powerpc/smp.h | 16 ++ 7 files changed, 256 insertions(+), 56 deletions(-) diff -r ce8c1e26b2ae -r ea41ccaa8d77 xen/arch/powerpc/external.c --- a/xen/arch/powerpc/external.c Wed Nov 22 14:35:09 2006 -0500 +++ b/xen/arch/powerpc/external.c Wed Nov 22 14:53:13 2006 -0500 @@ -82,7 +82,16 @@ void do_external(struct cpu_user_regs *r vec = xen_mpic_get_irq(regs); -if (vec != -1) { +if (vector_is_ipi(vec)) { + /* do_IRQ is fundamentally broken for reliable IPI delivery. */ + irq_desc_t *desc = &irq_desc[vec]; + regs->entry_vector = vec; + spin_lock(&desc->lock); + desc->handler->ack(vec); + desc->action->handler(vector_to_irq(vec), desc->action->dev_id, regs); + desc->handler->end(vec); + spin_unlock(&desc->lock); +} else if (vec != -1) { DBG("EE:0x%lx isrc: %d\n", regs->msr, vec); regs->entry_vector = vec; do_IRQ(regs); @@ -253,3 +262,24 @@ int ioapic_guest_write(unsigned long phy BUG_ON(val != val); return 0; } + +void send_IPI_mask(cpumask_t mask, int vector) +{ +unsigned int cpus; +int const bits = 8 * sizeof(cpus); + +switch(vector) { +case CALL_FUNCTION_VECTOR: +case EVENT_CHECK_VECTOR: + break; +default: + BUG(); + return; +} + +BUG_ON(NR_CPUS > bits); +BUG_ON(fls(mask.bits[0]) > bits); + +cpus = mask.bits[0]; +mpic_send_ipi(vector, cpus); +} diff -r ce8c1e26b2ae -r ea41ccaa8d77 xen/arch/powerpc/mpic.c --- a/xen/arch/powerpc/mpic.c Wed Nov 22 14:35:09 2006 -0500 +++ b/xen/arch/powerpc/mpic.c Wed Nov 22 14:53:13 2006 -0500 @@ -27,10 +27,6 @@ #define alloc_bootmem(x) xmalloc_bytes(x) -#define request_irq(irq, handler, f, devname, dev_id) \ -panic("IPI requested: %d: %p: %s: %p\n", irq, handler, devname, dev_id) - -typedef int irqreturn_t; #define IRQ_NONE (0) #define IRQ_HANDLED(1) @@ -96,11 +92,6 @@ typedef int irqreturn_t; #endif #include #include - -static inline void smp_message_recv(int msg, struct pt_regs *regs) -{ -return; -} #ifdef DEBUG #define DBG(fmt...) printk(fmt) diff -r ce8c1e26b2ae -r ea41ccaa8d77 xen/arch/powerpc/mpic_init.c --- a/xen/arch/powerpc/mpic_init.c Wed Nov 22 14:35:09 2006 -0500 +++ b/xen/arch/powerpc/mpic_init.c Wed Nov 22 14:53:13 2006 -0500 @@ -22,6 +22,7 @@ #include #include #include +#include #include "mpic_init.h" #include "oftree.h" #include "of-devtree.h" @@ -358,6 +359,42 @@ static struct hw_interrupt_type *share_m #endif +static unsigned int mpic_startup_ipi(unsigned int irq) +{ +mpic->hc_ipi.enable(irq); +return 0; +} + +int request_irq(unsigned int irq, + irqreturn_t (*handler)(int, void *, struct cpu_user_regs *), + unsigned long irqflags, const char * devname, void *dev_id) +{ +int retval; +struct irqaction *action; +void (*func)(int, void *, struct cpu_user_regs *); + +action = xmalloc(struct irqaction); +if (!action) { + BUG(); + return -ENOMEM; +} + +/* Xen's handler prototype is slightly different than Linux's. */ +func = (void (*)(int, void *, struct cpu_user_regs *))handler; + +action->handler = func; +action->name = devname; +action->dev_id = dev_id; + +retval = setup_irq(irq, action); +if (retval) { + BUG(); + xfree(action); +} + +return retval; +} + struct hw_interrupt_type *xen_mpic_init(struct hw_interrupt_type *xen_irq) { unsigned int isu_size; @@ -397,6 +434,11 @@ struct hw_interrupt_type *xen_mpic_init( hit = share_mpic(&mpic->hc_irq, xen_irq); printk("%s: success\n", __func__); + +