Re: SMP issue (Re: Status summary Was: [XenPPC] Cannot boot from local disk)

2006-11-28 Thread Amos Waterland
On Wed, Nov 29, 2006 at 03:14:54PM +0900, Kiyokuni Kawachiya wrote:
> > I can confirm that the JS20 of Kawachiya-san does not boot current
> > Xen/PPC without the nosmp option.  I got some time on the blade and
> > narrowed the point of failure down to somewhere in the synchronize_rcu
> > call that synchronize_net makes in dom0 Linux init.  I will work on
> > it tomorrow during JST night.
> 
> Thank you for the investigation.
> 
> Since no one complains about this, the problem (XenPPC does not boot with
> SMP) seems to be my local problem.
> I am afraid that I did some trivial mistake in setting up my JS20 blade.

I do not think you did anything wrong in setting up the blade.  It is a
JS20 model that Xen/PPC has not seen before and we will have to find
out what is going wrong.


___
Xen-ppc-devel mailing list
Xen-ppc-devel@lists.xensource.com
http://lists.xensource.com/xen-ppc-devel


Re: SMP issue (Re: Status summary Was: [XenPPC] Cannot boot from local disk)

2006-11-28 Thread Kiyokuni Kawachiya
Amos,

> I can confirm that the JS20 of Kawachiya-san does not boot current
> Xen/PPC without the nosmp option.  I got some time on the blade and
> narrowed the point of failure down to somewhere in the synchronize_rcu
> call that synchronize_net makes in dom0 Linux init.  I will work on
> it tomorrow during JST night.

Thank you for the investigation.

Since no one complains about this, the problem (XenPPC does not boot with
SMP) seems to be my local problem.
I am afraid that I did some trivial mistake in setting up my JS20 blade.

Kiyokuni Kawachiya
IBM Tokyo Research Laboratory


___
Xen-ppc-devel mailing list
Xen-ppc-devel@lists.xensource.com
http://lists.xensource.com/xen-ppc-devel


Re: SMP issue (Re: Status summary Was: [XenPPC] Cannot boot from local disk)

2006-11-28 Thread Amos Waterland
I can confirm that the JS20 of Kawachiya-san does not boot current
Xen/PPC without the nosmp option.  I got some time on the blade and
narrowed the point of failure down to somewhere in the synchronize_rcu
call that synchronize_net makes in dom0 Linux init.  I will work on
it tomorrow during JST night.


___
Xen-ppc-devel mailing list
Xen-ppc-devel@lists.xensource.com
http://lists.xensource.com/xen-ppc-devel


[XenPPC] [xenppc-unstable] [XEN][POWERPC] cleanup hard tabs

2006-11-28 Thread Xen patchbot-xenppc-unstable
# HG changeset patch
# User Jimi Xenidis <[EMAIL PROTECTED]>
# Node ID bb5491a55606b88c86f380aae406f7077c3118bc
# Parent  2e909d6f2ab767fe5723a97e2f5413f876167296
[XEN][POWERPC] cleanup hard tabs
allowed in some files in order to track linux lineage
---
 xen/arch/powerpc/bitops.c |  124 +-
 xen/arch/powerpc/external.c   |2 
 xen/arch/powerpc/memory.c |   12 +--
 xen/arch/powerpc/of-devtree.h |   34 -
 xen/arch/powerpc/of_handler/console.c |   12 +--
 xen/arch/powerpc/papr/xlate.c |2 
 xen/arch/powerpc/rtas.c   |   14 +--
 xen/arch/powerpc/shadow.c |4 -
 xen/arch/powerpc/smp.c|   10 +-
 xen/arch/powerpc/smpboot.c|3 
 10 files changed, 108 insertions(+), 109 deletions(-)

diff -r 2e909d6f2ab7 -r bb5491a55606 xen/arch/powerpc/bitops.c
--- a/xen/arch/powerpc/bitops.c Tue Nov 28 18:46:13 2006 -0500
+++ b/xen/arch/powerpc/bitops.c Tue Nov 28 19:01:46 2006 -0500
@@ -12,42 +12,42 @@
  * @size: The maximum size to search
  */
 unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
-   unsigned long offset)
+unsigned long offset)
 {
-   const unsigned long *p = addr + BITOP_WORD(offset);
-   unsigned long result = offset & ~(BITS_PER_LONG-1);
-   unsigned long tmp;
+const unsigned long *p = addr + BITOP_WORD(offset);
+unsigned long result = offset & ~(BITS_PER_LONG-1);
+unsigned long tmp;
 
-   if (offset >= size)
-   return size;
-   size -= result;
-   offset %= BITS_PER_LONG;
-   if (offset) {
-   tmp = *(p++);
-   tmp &= (~0UL << offset);
-   if (size < BITS_PER_LONG)
-   goto found_first;
-   if (tmp)
-   goto found_middle;
-   size -= BITS_PER_LONG;
-   result += BITS_PER_LONG;
-   }
-   while (size & ~(BITS_PER_LONG-1)) {
-   if ((tmp = *(p++)))
-   goto found_middle;
-   result += BITS_PER_LONG;
-   size -= BITS_PER_LONG;
-   }
-   if (!size)
-   return result;
-   tmp = *p;
+if (offset >= size)
+return size;
+size -= result;
+offset %= BITS_PER_LONG;
+if (offset) {
+tmp = *(p++);
+tmp &= (~0UL << offset);
+if (size < BITS_PER_LONG)
+goto found_first;
+if (tmp)
+goto found_middle;
+size -= BITS_PER_LONG;
+result += BITS_PER_LONG;
+}
+while (size & ~(BITS_PER_LONG-1)) {
+if ((tmp = *(p++)))
+goto found_middle;
+result += BITS_PER_LONG;
+size -= BITS_PER_LONG;
+}
+if (!size)
+return result;
+tmp = *p;
 
 found_first:
-   tmp &= (~0UL >> (BITS_PER_LONG - size));
-   if (tmp == 0UL) /* Are any bits set? */
-   return result + size;   /* Nope. */
+tmp &= (~0UL >> (BITS_PER_LONG - size));
+if (tmp == 0UL)/* Are any bits set? */
+return result + size;/* Nope. */
 found_middle:
-   return result + __ffs(tmp);
+return result + __ffs(tmp);
 }
 
 /*
@@ -55,40 +55,40 @@ found_middle:
  * Linus' asm-alpha/bitops.h.
  */
 unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
-unsigned long offset)
+ unsigned long offset)
 {
-   const unsigned long *p = addr + BITOP_WORD(offset);
-   unsigned long result = offset & ~(BITS_PER_LONG-1);
-   unsigned long tmp;
+const unsigned long *p = addr + BITOP_WORD(offset);
+unsigned long result = offset & ~(BITS_PER_LONG-1);
+unsigned long tmp;
 
-   if (offset >= size)
-   return size;
-   size -= result;
-   offset %= BITS_PER_LONG;
-   if (offset) {
-   tmp = *(p++);
-   tmp |= ~0UL >> (BITS_PER_LONG - offset);
-   if (size < BITS_PER_LONG)
-   goto found_first;
-   if (~tmp)
-   goto found_middle;
-   size -= BITS_PER_LONG;
-   result += BITS_PER_LONG;
-   }
-   while (size & ~(BITS_PER_LONG-1)) {
-   if (~(tmp = *(p++)))
-   goto found_middle;
-   result += BITS_PER_LONG;
-   size -= BITS_PER_LONG;
-   }
-   if (!size)
-   return result;
-   tmp = *p;
+if (offset >= size)
+return size;
+size -= result;
+offset %= BITS_PER_LONG;
+if (offset) {
+tmp = *(p++);
+tmp |= ~0UL >> (BITS_PER_LONG - offset);
+if (size < BITS_PER_LONG)
+goto found_first;
+if (~tmp)
+goto found_middle;
+size -= BITS_PER_LONG;
+result += BITS_PER_LONG;
+}
+while (size & ~(BITS_PER_LO

[XenPPC] [xenppc-unstable] [XEN][POWERPC] Fix IPI stall timeout without using timebase_freq

2006-11-28 Thread Xen patchbot-xenppc-unstable
# HG changeset patch
# User Jimi Xenidis <[EMAIL PROTECTED]>
# Node ID 2e909d6f2ab767fe5723a97e2f5413f876167296
# Parent  e01e08ca629b4f154828b0976a58df8767558aec
[XEN][POWERPC] Fix IPI stall timeout without using timebase_freq

When using the register dump feature of Xen, one will sometimes see a
message about an IPI finish stall.  This is because of an int to long
comparison bug, so fix it by doing proper nanosecond based time
accounting with no explicit reference to timebase_freq.

Signed-off-by: Amos Waterland <[EMAIL PROTECTED]>
Signed-off-by: Jimi Xenidis <[EMAIL PROTECTED]>
---
 xen/arch/powerpc/smp.c |   39 +--
 1 files changed, 25 insertions(+), 14 deletions(-)

diff -r e01e08ca629b -r 2e909d6f2ab7 xen/arch/powerpc/smp.c
--- a/xen/arch/powerpc/smp.cTue Nov 28 17:01:00 2006 -0500
+++ b/xen/arch/powerpc/smp.cTue Nov 28 18:46:13 2006 -0500
@@ -90,7 +90,8 @@ int on_selected_cpus(
 int retry,
 int wait)
 {
-int t, retval = 0, nr_cpus = cpus_weight(selected);
+int retval = 0, nr_cpus = cpus_weight(selected);
+unsigned long start, stall = SECONDS(1);
 
 spin_lock(&call_lock);
 
@@ -104,19 +105,21 @@ int on_selected_cpus(
 send_IPI_mask(selected, CALL_FUNCTION_VECTOR);
 
 /* We always wait for an initiation ACK from remote CPU.  */
-for (t = 0; atomic_read(&call_data.started) != nr_cpus; t++) {
-if (t && t % timebase_freq == 0) {
+for (start = NOW(); atomic_read(&call_data.started) != nr_cpus; ) {
+if (NOW() > start + stall) {
 printk("IPI start stall: %d ACKS to %d SYNS\n", 
atomic_read(&call_data.started), nr_cpus);
+   start = NOW();
 }
 }
 
 /* If told to, we wait for a completion ACK from remote CPU.  */
 if (wait) {
-for (t = 0; atomic_read(&call_data.finished) != nr_cpus; t++) {
-if (t > timebase_freq && t % timebase_freq == 0) {
+for (start = NOW(); atomic_read(&call_data.finished) != nr_cpus; ) {
+if (NOW() > start + stall) {
 printk("IPI finish stall: %d ACKS to %d SYNS\n", 
atomic_read(&call_data.finished), nr_cpus);
+start = NOW();
 }
 }
 }
@@ -168,6 +171,11 @@ void smp_message_recv(int msg, struct cp
 #ifdef DEBUG_IPI
 static void debug_ipi_ack(void *info)
 {
+if (info) {
+   unsigned long start, stall = SECONDS(5);
+   for (start = NOW(); NOW() < start + stall; );
+   printk("IPI recv on cpu #%d: %s\n", smp_processor_id(), (char *)info);
+}
 return;
 }
 
@@ -175,12 +183,12 @@ void ipi_torture_test(void)
 {
 int cpu;
 unsigned long before, after, delta;
-unsigned long min = ~0, max = 0, mean = 0, sum = 0, tick = 0;
+unsigned long min = ~0, max = 0, mean = 0, sum = 0, trials = 0;
 cpumask_t mask;
 
 cpus_clear(mask);
 
-while (tick < 100) {
+while (trials < 100) {
 for_each_online_cpu(cpu) {
 cpu_set(cpu, mask);
 before = mftb();
@@ -192,12 +200,15 @@ void ipi_torture_test(void)
 if (delta > max) max = delta;
 if (delta < min) min = delta;
 sum += delta;
-tick++;
-}
-}
-
-mean = sum / tick;
-
-printk("IPI tb ticks: min = %ld max = %ld mean = %ld\n", min, max, mean);
+trials++;
+}
+}
+
+mean = tb_to_ns(sum / trials);
+
+printk("IPI latency: min = %ld ticks, max = %ld ticks, mean = %ldns\n",
+  min, max, mean);
+
+smp_call_function(debug_ipi_ack, "Hi", 0, 1);
 }
 #endif

___
Xen-ppc-devel mailing list
Xen-ppc-devel@lists.xensource.com
http://lists.xensource.com/xen-ppc-devel


[XenPPC] [PATCH] Fix IPI stall timeout without using timebase_freq

2006-11-28 Thread Amos Waterland
When using the register dump feature of Xen, one will sometimes see a
message about an IPI finish stall.  This is because of an int to long
comparison bug, so fix it by doing proper nanosecond based time
accounting with no explicit reference to timebase_freq.

Signed-off-by: Amos Waterland <[EMAIL PROTECTED]>

---

 smp.c |   39 +--
 1 file changed, 25 insertions(+), 14 deletions(-)

diff -r e01e08ca629b xen/arch/powerpc/smp.c
--- a/xen/arch/powerpc/smp.cTue Nov 28 17:01:00 2006 -0500
+++ b/xen/arch/powerpc/smp.cTue Nov 28 18:28:35 2006 -0500
@@ -90,7 +90,8 @@ int on_selected_cpus(
 int retry,
 int wait)
 {
-int t, retval = 0, nr_cpus = cpus_weight(selected);
+int retval = 0, nr_cpus = cpus_weight(selected);
+unsigned long start, stall = SECONDS(1);
 
 spin_lock(&call_lock);
 
@@ -104,19 +105,21 @@ int on_selected_cpus(
 send_IPI_mask(selected, CALL_FUNCTION_VECTOR);
 
 /* We always wait for an initiation ACK from remote CPU.  */
-for (t = 0; atomic_read(&call_data.started) != nr_cpus; t++) {
-if (t && t % timebase_freq == 0) {
+for (start = NOW(); atomic_read(&call_data.started) != nr_cpus; ) {
+if (NOW() > start + stall) {
 printk("IPI start stall: %d ACKS to %d SYNS\n", 
atomic_read(&call_data.started), nr_cpus);
+   start = NOW();
 }
 }
 
 /* If told to, we wait for a completion ACK from remote CPU.  */
 if (wait) {
-for (t = 0; atomic_read(&call_data.finished) != nr_cpus; t++) {
-if (t > timebase_freq && t % timebase_freq == 0) {
+for (start = NOW(); atomic_read(&call_data.finished) != nr_cpus; ) {
+if (NOW() > start + stall) {
 printk("IPI finish stall: %d ACKS to %d SYNS\n", 
atomic_read(&call_data.finished), nr_cpus);
+start = NOW();
 }
 }
 }
@@ -168,6 +171,11 @@ void smp_message_recv(int msg, struct cp
 #ifdef DEBUG_IPI
 static void debug_ipi_ack(void *info)
 {
+if (info) {
+   unsigned long start, stall = SECONDS(5);
+   for (start = NOW(); NOW() < start + stall; );
+   printk("IPI recv on cpu #%d: %s\n", smp_processor_id(), (char *)info);
+}
 return;
 }
 
@@ -175,12 +183,12 @@ void ipi_torture_test(void)
 {
 int cpu;
 unsigned long before, after, delta;
-unsigned long min = ~0, max = 0, mean = 0, sum = 0, tick = 0;
+unsigned long min = ~0, max = 0, mean = 0, sum = 0, trials = 0;
 cpumask_t mask;
 
 cpus_clear(mask);
 
-while (tick < 100) {
+while (trials < 100) {
 for_each_online_cpu(cpu) {
 cpu_set(cpu, mask);
 before = mftb();
@@ -192,12 +200,15 @@ void ipi_torture_test(void)
 if (delta > max) max = delta;
 if (delta < min) min = delta;
 sum += delta;
-tick++;
-}
-}
-
-mean = sum / tick;
-
-printk("IPI tb ticks: min = %ld max = %ld mean = %ld\n", min, max, mean);
+trials++;
+}
+}
+
+mean = tb_to_ns(sum / trials);
+
+printk("IPI latency: min = %ld ticks, max = %ld ticks, mean = %ldns\n",
+  min, max, mean);
+
+smp_call_function(debug_ipi_ack, "Hi", 0, 1);
 }
 #endif

___
Xen-ppc-devel mailing list
Xen-ppc-devel@lists.xensource.com
http://lists.xensource.com/xen-ppc-devel


Re: [XenPPC] [PATCH] Fix IPI stall timeout

2006-11-28 Thread Jimi Xenidis


On Nov 28, 2006, at 5:55 PM, Amos Waterland wrote:


When using the register dump feature of Xen, one will sometimes see a
message about an IPI finish stall.  This is because of an int to long
comparison bug, so fix it by doing proper nanosecond based time  
accounting.


As a side note, our IPI remote function call latency of completion on
a JS21 blade is: min = 34 ticks, max = 119 ticks, mean = 2691ns.

Signed-off-by: Amos Waterland <[EMAIL PROTECTED]>

---

 smp.c |   39 +--
 1 file changed, 25 insertions(+), 14 deletions(-)

diff -r e01e08ca629b xen/arch/powerpc/smp.c
--- a/xen/arch/powerpc/smp.cTue Nov 28 17:01:00 2006 -0500
+++ b/xen/arch/powerpc/smp.cTue Nov 28 17:40:50 2006 -0500
@@ -90,7 +90,8 @@ int on_selected_cpus(
 int retry,
 int wait)
 {
-int t, retval = 0, nr_cpus = cpus_weight(selected);
+int retval = 0, nr_cpus = cpus_weight(selected);
+unsigned long start, stall = tb_to_ns(timebase_freq);


Since you are using NOW(), this should have nothing to do with timebase.
so should be
  unsigned long start, stall = SECONDS(1);



___
Xen-ppc-devel mailing list
Xen-ppc-devel@lists.xensource.com
http://lists.xensource.com/xen-ppc-devel


[XenPPC] [PATCH] Fix IPI stall timeout

2006-11-28 Thread Amos Waterland
When using the register dump feature of Xen, one will sometimes see a
message about an IPI finish stall.  This is because of an int to long
comparison bug, so fix it by doing proper nanosecond based time accounting.

As a side note, our IPI remote function call latency of completion on 
a JS21 blade is: min = 34 ticks, max = 119 ticks, mean = 2691ns.

Signed-off-by: Amos Waterland <[EMAIL PROTECTED]>
 
---

 smp.c |   39 +--
 1 file changed, 25 insertions(+), 14 deletions(-)

diff -r e01e08ca629b xen/arch/powerpc/smp.c
--- a/xen/arch/powerpc/smp.cTue Nov 28 17:01:00 2006 -0500
+++ b/xen/arch/powerpc/smp.cTue Nov 28 17:40:50 2006 -0500
@@ -90,7 +90,8 @@ int on_selected_cpus(
 int retry,
 int wait)
 {
-int t, retval = 0, nr_cpus = cpus_weight(selected);
+int retval = 0, nr_cpus = cpus_weight(selected);
+unsigned long start, stall = tb_to_ns(timebase_freq);
 
 spin_lock(&call_lock);
 
@@ -104,19 +105,21 @@ int on_selected_cpus(
 send_IPI_mask(selected, CALL_FUNCTION_VECTOR);
 
 /* We always wait for an initiation ACK from remote CPU.  */
-for (t = 0; atomic_read(&call_data.started) != nr_cpus; t++) {
-if (t && t % timebase_freq == 0) {
+for (start = NOW(); atomic_read(&call_data.started) != nr_cpus; ) {
+if (NOW() > start + stall) {
 printk("IPI start stall: %d ACKS to %d SYNS\n", 
atomic_read(&call_data.started), nr_cpus);
+   start = NOW();
 }
 }
 
 /* If told to, we wait for a completion ACK from remote CPU.  */
 if (wait) {
-for (t = 0; atomic_read(&call_data.finished) != nr_cpus; t++) {
-if (t > timebase_freq && t % timebase_freq == 0) {
+for (start = NOW(); atomic_read(&call_data.finished) != nr_cpus; ) {
+if (NOW() > start + stall) {
 printk("IPI finish stall: %d ACKS to %d SYNS\n", 
atomic_read(&call_data.finished), nr_cpus);
+start = NOW();
 }
 }
 }
@@ -168,6 +171,11 @@ void smp_message_recv(int msg, struct cp
 #ifdef DEBUG_IPI
 static void debug_ipi_ack(void *info)
 {
+if (info) {
+   unsigned long start, stall = tb_to_ns(5 * timebase_freq);
+   for (start = NOW(); NOW() < start + stall; );
+   printk("IPI recv on cpu #%d: %s\n", smp_processor_id(), (char *)info);
+}
 return;
 }
 
@@ -175,12 +183,12 @@ void ipi_torture_test(void)
 {
 int cpu;
 unsigned long before, after, delta;
-unsigned long min = ~0, max = 0, mean = 0, sum = 0, tick = 0;
+unsigned long min = ~0, max = 0, mean = 0, sum = 0, trials = 0;
 cpumask_t mask;
 
 cpus_clear(mask);
 
-while (tick < 100) {
+while (trials < 100) {
 for_each_online_cpu(cpu) {
 cpu_set(cpu, mask);
 before = mftb();
@@ -192,12 +200,15 @@ void ipi_torture_test(void)
 if (delta > max) max = delta;
 if (delta < min) min = delta;
 sum += delta;
-tick++;
-}
-}
-
-mean = sum / tick;
-
-printk("IPI tb ticks: min = %ld max = %ld mean = %ld\n", min, max, mean);
+trials++;
+}
+}
+
+mean = tb_to_ns(sum / trials);
+
+printk("IPI latency: min = %ld ticks, max = %ld ticks, mean = %ldns\n",
+  min, max, mean);
+
+smp_call_function(debug_ipi_ack, "Hi", 0, 1);
 }
 #endif

___
Xen-ppc-devel mailing list
Xen-ppc-devel@lists.xensource.com
http://lists.xensource.com/xen-ppc-devel


[XenPPC] [xenppc-unstable] [XEN][POWERPC] Do not override smp function call wait flag

2006-11-28 Thread Xen patchbot-xenppc-unstable
# HG changeset patch
# User Jimi Xenidis <[EMAIL PROTECTED]>
# Node ID e01e08ca629b4f154828b0976a58df8767558aec
# Parent  1e1a63408129bea2d87f485c52f1be21ada35ff0
[XEN][POWERPC] Do not override smp function call wait flag
Do not override the caller's wishes regarding waiting for smp function
call completion.  I was being too conservative in this respect: the lock
protects the call_data structure, and the function called is expected to
be threadsafe.

Signed-off-by: Amos Waterland <[EMAIL PROTECTED]>
Signed-off-by: Jimi Xenidis <[EMAIL PROTECTED]>
---
 xen/arch/powerpc/smp.c |1 -
 1 files changed, 1 deletion(-)

diff -r 1e1a63408129 -r e01e08ca629b xen/arch/powerpc/smp.c
--- a/xen/arch/powerpc/smp.cTue Nov 28 16:56:40 2006 -0500
+++ b/xen/arch/powerpc/smp.cTue Nov 28 17:01:00 2006 -0500
@@ -97,7 +97,6 @@ int on_selected_cpus(
 call_data.func = func;
 call_data.info = info;
 call_data.wait = wait;
-call_data.wait = 1;  /* Until we get RCU around call_data.  */
 atomic_set(&call_data.started, 0);
 atomic_set(&call_data.finished, 0);
 mb();

___
Xen-ppc-devel mailing list
Xen-ppc-devel@lists.xensource.com
http://lists.xensource.com/xen-ppc-devel


[XenPPC] [xenppc-unstable] [XEN][POWERPC] Generalized parallel IPI handlers

2006-11-28 Thread Jimi Xenidis


# HG changeset patch
# User Jimi Xenidis <[EMAIL PROTECTED]>
# Node ID 1e1a63408129bea2d87f485c52f1be21ada35ff0
# Parent  a2a4a6bdc5cdedf3fe90626de9b16c9ee898f178
[XEN][POWERPC] Generalized parallel IPI handlers

Our problem with x86 do_IRQ is that it does not respect IRQ_PER_CPU, so
make our logic reflect that generically.  We remove the spin lock in
this case, since the programming convention is to update irq descs
atomically (if at all).  This patch allows read_clocks to work properly.

Signed-off-by: Amos Waterland <[EMAIL PROTECTED]>
Signed-off-by: Jimi Xenidis <[EMAIL PROTECTED]>
---
 xen/arch/powerpc/external.c   |6 ++
 xen/arch/powerpc/mpic_init.c  |6 --
 xen/include/asm-powerpc/smp.h |1 -
 3 files changed, 2 insertions(+), 11 deletions(-)

diff -r a2a4a6bdc5cd -r 1e1a63408129 xen/arch/powerpc/external.c
--- a/xen/arch/powerpc/external.c   Tue Nov 28 16:43:53 2006 -0500
+++ b/xen/arch/powerpc/external.c   Tue Nov 28 16:56:40 2006 -0500
@@ -82,15 +82,13 @@ void do_external(struct cpu_user_regs *r

 vec = xen_mpic_get_irq(regs);

-if (vector_is_ipi(vec)) {
-/* do_IRQ is fundamentally broken for reliable IPI  
delivery.  */

+if (irq_desc[vec].status & IRQ_PER_CPU) {
+   /* x86 do_IRQ does not respect the per cpu flag.  */
 irq_desc_t *desc = &irq_desc[vec];
 regs->entry_vector = vec;
-spin_lock(&desc->lock);
 desc->handler->ack(vec);
 desc->action->handler(vector_to_irq(vec), desc->action- 
>dev_id, regs);

 desc->handler->end(vec);
-spin_unlock(&desc->lock);
 } else if (vec != -1) {
 DBG("EE:0x%lx isrc: %d\n", regs->msr, vec);
 regs->entry_vector = vec;
diff -r a2a4a6bdc5cd -r 1e1a63408129 xen/arch/powerpc/mpic_init.c
--- a/xen/arch/powerpc/mpic_init.c  Tue Nov 28 16:43:53 2006 -0500
+++ b/xen/arch/powerpc/mpic_init.c  Tue Nov 28 16:56:40 2006 -0500
@@ -448,9 +448,3 @@ int xen_mpic_get_irq(struct cpu_user_reg

return mpic_get_one_irq(mpic, regs);
 }
-
-int vector_is_ipi(int vector)
-{
-BUG_ON(!mpic);
-return (mpic->ipi_offset <= vector) && (vector < mpic- 
>ipi_offset + 4);

-}
diff -r a2a4a6bdc5cd -r 1e1a63408129 xen/include/asm-powerpc/smp.h
--- a/xen/include/asm-powerpc/smp.h Tue Nov 28 16:43:53 2006 -0500
+++ b/xen/include/asm-powerpc/smp.h Tue Nov 28 16:56:40 2006 -0500
@@ -51,7 +51,6 @@ void smp_call_function_interrupt(struct
 void smp_call_function_interrupt(struct cpu_user_regs *regs);
 void smp_event_check_interrupt(void);
 void send_IPI_mask(cpumask_t mask, int vector);
-int vector_is_ipi(int vector);

 #undef DEBUG_IPI
 #ifdef DEBUG_IPI



___
Xen-ppc-devel mailing list
Xen-ppc-devel@lists.xensource.com
http://lists.xensource.com/xen-ppc-devel


Re: [XenPPC] [PATCH] Do not override smp function call wait flag

2006-11-28 Thread Jimi Xenidis

pushed, thanks
On Nov 28, 2006, at 4:34 PM, Amos Waterland wrote:


Do not override the caller's wishes regarding waiting for smp function
call completion.  I was being too conservative in this respect: the  
lock
protects the call_data structure, and the function called is  
expected to

be threadsafe.

Signed-off-by: Amos Waterland <[EMAIL PROTECTED]>

---

 smp.c |1 -
 1 file changed, 1 deletion(-)

diff -r a8e67a19c325 xen/arch/powerpc/smp.c
--- a/xen/arch/powerpc/smp.cTue Nov 28 10:33:53 2006 -0500
+++ b/xen/arch/powerpc/smp.cTue Nov 28 16:28:10 2006 -0500
@@ -97,7 +97,6 @@ int on_selected_cpus(
 call_data.func = func;
 call_data.info = info;
 call_data.wait = wait;
-call_data.wait = 1;  /* Until we get RCU around call_data.  */
 atomic_set(&call_data.started, 0);
 atomic_set(&call_data.finished, 0);
 mb();

___
Xen-ppc-devel mailing list
Xen-ppc-devel@lists.xensource.com
http://lists.xensource.com/xen-ppc-devel



___
Xen-ppc-devel mailing list
Xen-ppc-devel@lists.xensource.com
http://lists.xensource.com/xen-ppc-devel


Re: [XenPPC] [PATCH] Generalized parallel IPI handlers

2006-11-28 Thread Jimi Xenidis

pushed, thanks!
-JX
On Nov 28, 2006, at 4:27 PM, Amos Waterland wrote:

Our problem with x86 do_IRQ is that it does not respect  
IRQ_PER_CPU, so

make our logic reflect that generically.  We remove the spin lock in
this case, since the programming convention is to update irq descs
atomically (if at all).  This patch allows read_clocks to work  
properly.


Signed-off-by: Amos Waterland <[EMAIL PROTECTED]>

---

 arch/powerpc/external.c   |6 ++
 arch/powerpc/mpic_init.c  |6 --
 include/asm-powerpc/smp.h |1 -
 3 files changed, 2 insertions(+), 11 deletions(-)

diff -r a8e67a19c325 xen/arch/powerpc/external.c
--- a/xen/arch/powerpc/external.c   Tue Nov 28 10:33:53 2006 -0500
+++ b/xen/arch/powerpc/external.c   Tue Nov 28 16:14:27 2006 -0500
@@ -82,15 +82,13 @@ void do_external(struct cpu_user_regs *r

 vec = xen_mpic_get_irq(regs);

-if (vector_is_ipi(vec)) {
-/* do_IRQ is fundamentally broken for reliable IPI  
delivery.  */

+if (irq_desc[vec].status & IRQ_PER_CPU) {
+   /* x86 do_IRQ does not respect the per cpu flag.  */
 irq_desc_t *desc = &irq_desc[vec];
 regs->entry_vector = vec;
-spin_lock(&desc->lock);
 desc->handler->ack(vec);
 desc->action->handler(vector_to_irq(vec), desc->action- 
>dev_id, regs);

 desc->handler->end(vec);
-spin_unlock(&desc->lock);
 } else if (vec != -1) {
 DBG("EE:0x%lx isrc: %d\n", regs->msr, vec);
 regs->entry_vector = vec;
diff -r a8e67a19c325 xen/arch/powerpc/mpic_init.c
--- a/xen/arch/powerpc/mpic_init.c  Tue Nov 28 10:33:53 2006 -0500
+++ b/xen/arch/powerpc/mpic_init.c  Tue Nov 28 16:14:15 2006 -0500
@@ -448,9 +448,3 @@ int xen_mpic_get_irq(struct cpu_user_reg

return mpic_get_one_irq(mpic, regs);
 }
-
-int vector_is_ipi(int vector)
-{
-BUG_ON(!mpic);
-return (mpic->ipi_offset <= vector) && (vector < mpic- 
>ipi_offset + 4);

-}
diff -r a8e67a19c325 xen/include/asm-powerpc/smp.h
--- a/xen/include/asm-powerpc/smp.h Tue Nov 28 10:33:53 2006 -0500
+++ b/xen/include/asm-powerpc/smp.h Tue Nov 28 16:14:19 2006 -0500
@@ -51,7 +51,6 @@ void smp_call_function_interrupt(struct
 void smp_call_function_interrupt(struct cpu_user_regs *regs);
 void smp_event_check_interrupt(void);
 void send_IPI_mask(cpumask_t mask, int vector);
-int vector_is_ipi(int vector);

 #undef DEBUG_IPI
 #ifdef DEBUG_IPI

___
Xen-ppc-devel mailing list
Xen-ppc-devel@lists.xensource.com
http://lists.xensource.com/xen-ppc-devel



___
Xen-ppc-devel mailing list
Xen-ppc-devel@lists.xensource.com
http://lists.xensource.com/xen-ppc-devel


[XenPPC] [PATCH] Do not override smp function call wait flag

2006-11-28 Thread Amos Waterland
Do not override the caller's wishes regarding waiting for smp function
call completion.  I was being too conservative in this respect: the lock
protects the call_data structure, and the function called is expected to
be threadsafe.

Signed-off-by: Amos Waterland <[EMAIL PROTECTED]>

---

 smp.c |1 -
 1 file changed, 1 deletion(-)

diff -r a8e67a19c325 xen/arch/powerpc/smp.c
--- a/xen/arch/powerpc/smp.cTue Nov 28 10:33:53 2006 -0500
+++ b/xen/arch/powerpc/smp.cTue Nov 28 16:28:10 2006 -0500
@@ -97,7 +97,6 @@ int on_selected_cpus(
 call_data.func = func;
 call_data.info = info;
 call_data.wait = wait;
-call_data.wait = 1;  /* Until we get RCU around call_data.  */
 atomic_set(&call_data.started, 0);
 atomic_set(&call_data.finished, 0);
 mb();

___
Xen-ppc-devel mailing list
Xen-ppc-devel@lists.xensource.com
http://lists.xensource.com/xen-ppc-devel


[XenPPC] [PATCH] Generalized parallel IPI handlers

2006-11-28 Thread Amos Waterland
Our problem with x86 do_IRQ is that it does not respect IRQ_PER_CPU, so
make our logic reflect that generically.  We remove the spin lock in
this case, since the programming convention is to update irq descs
atomically (if at all).  This patch allows read_clocks to work properly.

Signed-off-by: Amos Waterland <[EMAIL PROTECTED]>

---

 arch/powerpc/external.c   |6 ++
 arch/powerpc/mpic_init.c  |6 --
 include/asm-powerpc/smp.h |1 -
 3 files changed, 2 insertions(+), 11 deletions(-)

diff -r a8e67a19c325 xen/arch/powerpc/external.c
--- a/xen/arch/powerpc/external.c   Tue Nov 28 10:33:53 2006 -0500
+++ b/xen/arch/powerpc/external.c   Tue Nov 28 16:14:27 2006 -0500
@@ -82,15 +82,13 @@ void do_external(struct cpu_user_regs *r
 
 vec = xen_mpic_get_irq(regs);
 
-if (vector_is_ipi(vec)) {
-/* do_IRQ is fundamentally broken for reliable IPI delivery.  */
+if (irq_desc[vec].status & IRQ_PER_CPU) {
+   /* x86 do_IRQ does not respect the per cpu flag.  */
 irq_desc_t *desc = &irq_desc[vec];
 regs->entry_vector = vec;
-spin_lock(&desc->lock);
 desc->handler->ack(vec);
 desc->action->handler(vector_to_irq(vec), desc->action->dev_id, regs);
 desc->handler->end(vec);
-spin_unlock(&desc->lock);
 } else if (vec != -1) {
 DBG("EE:0x%lx isrc: %d\n", regs->msr, vec);
 regs->entry_vector = vec;
diff -r a8e67a19c325 xen/arch/powerpc/mpic_init.c
--- a/xen/arch/powerpc/mpic_init.c  Tue Nov 28 10:33:53 2006 -0500
+++ b/xen/arch/powerpc/mpic_init.c  Tue Nov 28 16:14:15 2006 -0500
@@ -448,9 +448,3 @@ int xen_mpic_get_irq(struct cpu_user_reg
 
return mpic_get_one_irq(mpic, regs);
 }
-
-int vector_is_ipi(int vector)
-{
-BUG_ON(!mpic);
-return (mpic->ipi_offset <= vector) && (vector < mpic->ipi_offset + 4);
-}
diff -r a8e67a19c325 xen/include/asm-powerpc/smp.h
--- a/xen/include/asm-powerpc/smp.h Tue Nov 28 10:33:53 2006 -0500
+++ b/xen/include/asm-powerpc/smp.h Tue Nov 28 16:14:19 2006 -0500
@@ -51,7 +51,6 @@ void smp_call_function_interrupt(struct 
 void smp_call_function_interrupt(struct cpu_user_regs *regs);
 void smp_event_check_interrupt(void);
 void send_IPI_mask(cpumask_t mask, int vector);
-int vector_is_ipi(int vector);
 
 #undef DEBUG_IPI
 #ifdef DEBUG_IPI

___
Xen-ppc-devel mailing list
Xen-ppc-devel@lists.xensource.com
http://lists.xensource.com/xen-ppc-devel


[XenPPC] [PATCH] Relax IPI stall timeout

2006-11-28 Thread Amos Waterland
When using the register dump feature of Xen, one will often see a
message about an IPI finish stall.  This is because we expected IPI
handlers to run very quickly, but in this case, the handler is doing a
lot of console I/O in order to print the register contents.  So relax
the stall timeout.

Signed-off-by: Amos Waterland <[EMAIL PROTECTED]>

---

 smp.c |5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff -r a8e67a19c325 xen/arch/powerpc/smp.c
--- a/xen/arch/powerpc/smp.cTue Nov 28 10:33:53 2006 -0500
+++ b/xen/arch/powerpc/smp.cTue Nov 28 13:29:50 2006 -0500
@@ -91,6 +91,7 @@ int on_selected_cpus(
 int wait)
 {
 int t, retval = 0, nr_cpus = cpus_weight(selected);
+int stall = timebase_freq * 10;
 
 spin_lock(&call_lock);
 
@@ -106,7 +107,7 @@ int on_selected_cpus(
 
 /* We always wait for an initiation ACK from remote CPU.  */
 for (t = 0; atomic_read(&call_data.started) != nr_cpus; t++) {
-if (t && t % timebase_freq == 0) {
+if (t > 0 && t % stall == 0) {
 printk("IPI start stall: %d ACKS to %d SYNS\n", 
atomic_read(&call_data.started), nr_cpus);
 }
@@ -115,7 +116,7 @@ int on_selected_cpus(
 /* If told to, we wait for a completion ACK from remote CPU.  */
 if (wait) {
 for (t = 0; atomic_read(&call_data.finished) != nr_cpus; t++) {
-if (t > timebase_freq && t % timebase_freq == 0) {
+if (t > 0 && t % stall == 0) {
 printk("IPI finish stall: %d ACKS to %d SYNS\n", 
atomic_read(&call_data.finished), nr_cpus);
 }

___
Xen-ppc-devel mailing list
Xen-ppc-devel@lists.xensource.com
http://lists.xensource.com/xen-ppc-devel


[XenPPC] [PATCH] Make IPI handlers run in parallel

2006-11-28 Thread Amos Waterland
Make a copy of the vector information while protected by a lock, then
release the lock and use that information to acknowledge and act on the IPI.
We always have a consistent set of information in this case, as opposed
to the case of dropping and reaquiring the lock.  

This patch makes read_clocks work properly.

Signed-off-by: Amos Waterland <[EMAIL PROTECTED]>

---

 external.c |   18 +++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff -r a8e67a19c325 xen/arch/powerpc/external.c
--- a/xen/arch/powerpc/external.c   Tue Nov 28 10:33:53 2006 -0500
+++ b/xen/arch/powerpc/external.c   Tue Nov 28 13:28:18 2006 -0500
@@ -84,13 +84,25 @@ void do_external(struct cpu_user_regs *r
 
 if (vector_is_ipi(vec)) {
 /* do_IRQ is fundamentally broken for reliable IPI delivery.  */
+void (*ack)(unsigned int irq);
+void (*handler)(int, void *, struct cpu_user_regs *);
+void (*end)(unsigned int irq);
+int irq;
+void *dev_id;
 irq_desc_t *desc = &irq_desc[vec];
 regs->entry_vector = vec;
+
 spin_lock(&desc->lock);
-desc->handler->ack(vec);
-desc->action->handler(vector_to_irq(vec), desc->action->dev_id, regs);
-desc->handler->end(vec);
+ack = desc->handler->ack;
+handler = desc->action->handler;
+end = desc->handler->end;
+irq = vector_to_irq(vec);
+dev_id = desc->action->dev_id;
 spin_unlock(&desc->lock);
+
+ack(vec);
+handler(irq, dev_id, regs);
+end(vec);
 } else if (vec != -1) {
 DBG("EE:0x%lx isrc: %d\n", regs->msr, vec);
 regs->entry_vector = vec;

___
Xen-ppc-devel mailing list
Xen-ppc-devel@lists.xensource.com
http://lists.xensource.com/xen-ppc-devel


Re: [XenPPC] multicast function invocations

2006-11-28 Thread Jimi Xenidis


On Nov 28, 2006, at 3:17 AM, Amos Waterland wrote:


To summarize the situation, I found two problems.

1. Core Xen has a bug (I believe) in which they do not mark their cpu
   mask volatile, so the compiler generates an infinite loop in  
read_clocks.


   I will send some patches upstream to resolve this issue.

This was PPc specific bug covered by:
# HG changeset patch
# User Jimi Xenidis <[EMAIL PROTECTED]>
# Node ID a8e67a19c3255a6bbc0aaa5c9e1736f1ddd76f6c
# Parent  cc45282daf3d242fdcf6e858c0b18b7f1086a318
cpu_relax() needs to call barrier()



2. Xen/PPC has a problem in that its IPI callbacks (remote function
   invocations) do not actually happen in parallel, which breaks the
   design of read_clocks.  Our IPI callbacks are serialized by the
   design we copied from Xen/x86, which is to acquire a per-vector  
lock

   very early in the EE handling path (see do_external).

   I guess my real question is: will Xen/PPC ever in the future run  
its

   IPI remote function callbacks with EE enabled?  If the plan is to
   keep things the way they are now, then we should remove the
   per-vector lock entirely.


The lock protects the vector handler which theoretically could be  
reprogramed.
However we should not be calling the action with the lock held unless  
its per-cpu




The following is a patch that implements the above two conclusions and
which allows 'C-aC-aC-at' to work properly.  Comments?

---

 arch/powerpc/external.c |2 --
 common/keyhandler.c |2 +-
 include/xen/cpumask.h   |2 +-
 3 files changed, 2 insertions(+), 4 deletions(-)

diff -r 305751a5281e xen/arch/powerpc/external.c
--- a/xen/arch/powerpc/external.c   Wed Nov 22 16:29:25 2006 -0500
+++ b/xen/arch/powerpc/external.c   Tue Nov 28 03:07:10 2006 -0500
@@ -86,11 +86,9 @@ void do_external(struct cpu_user_regs *r
 /* do_IRQ is fundamentally broken for reliable IPI  
delivery.  */

 irq_desc_t *desc = &irq_desc[vec];
 regs->entry_vector = vec;
-spin_lock(&desc->lock);
 desc->handler->ack(vec);
 desc->action->handler(vector_to_irq(vec), desc->action- 
>dev_id, regs);

 desc->handler->end(vec);
-spin_unlock(&desc->lock);


Please add a:
   BUG_ON(!(desc->status & IRQ_PER_CPU));

If you trip this then we will need:
 - locking around the desc
 - tickle some status bits
 - unlock the action
 - lock again
 - tickle status bits.
...



___
Xen-ppc-devel mailing list
Xen-ppc-devel@lists.xensource.com
http://lists.xensource.com/xen-ppc-devel


Re: [XenPPC] [xenppc-unstable] [XEN][POWERPC] SMP/IPI/MB combined

2006-11-28 Thread Amos Waterland
On Tue, Nov 28, 2006 at 10:47:31AM -0500, Jimi Xenidis wrote:
> On Nov 28, 2006, at 1:37 AM, Amos Waterland wrote:
> > The compiler is not told that read_clocks_cpumask is volatile, so it
> > is free to turn that loop into an infinite loop, as my gcc 4.1.1
> > cross-compiler does.  I am surprised that other Xen architectures
> > have not seen the same problem.
> 
> Found it, cpu_relax() is supposed to contain barrier() call.
> My fault but it coulda been hollis :)

Excellent, thanks.

> you _always_ have to wait for call_data.started it means that its
> safe to reuse call_data.

Yes, I know :)  I only did that as a crude approximation, the proper
solution (pending your design comments), is not serializing do_external
with a lock (see the next patch I sent).


___
Xen-ppc-devel mailing list
Xen-ppc-devel@lists.xensource.com
http://lists.xensource.com/xen-ppc-devel


Re: [XenPPC] Crash on js12

2006-11-28 Thread Jimi Xenidis


On Nov 28, 2006, at 2:24 AM, Amos Waterland wrote:


On Mon, Nov 27, 2006 at 12:04:36PM -0500, Maria Butrico wrote:

This problem is solved on the latest version of Xen.


I do not think the problem is completely solved.  I am still seeing
repeated prints like this:

 (XEN) CPU[PIR:1 IPI:1 Logical:1] Hello World!CPU[PIR:1 IPI:1  
Logical:1]

 Hello World!

 (XEN) :1] Hello World!Got ack


Yeah I noticed this too, it is not related to the OF problems Maria  
was reporting, but this _is_ odd.


-JX



___
Xen-ppc-devel mailing list
Xen-ppc-devel@lists.xensource.com
http://lists.xensource.com/xen-ppc-devel


[XenPPC] [xenppc-unstable] cpu_relax() needs to call barrier()

2006-11-28 Thread Xen patchbot-xenppc-unstable
# HG changeset patch
# User Jimi Xenidis <[EMAIL PROTECTED]>
# Node ID a8e67a19c3255a6bbc0aaa5c9e1736f1ddd76f6c
# Parent  cc45282daf3d242fdcf6e858c0b18b7f1086a318
cpu_relax() needs to call barrier()

Signed-off-by: Jimi Xenidis <[EMAIL PROTECTED]>
---
 xen/include/asm-powerpc/processor.h |3 ++-
 1 files changed, 2 insertions(+), 1 deletion(-)

diff -r cc45282daf3d -r a8e67a19c325 xen/include/asm-powerpc/processor.h
--- a/xen/include/asm-powerpc/processor.h   Mon Nov 27 17:17:07 2006 -0500
+++ b/xen/include/asm-powerpc/processor.h   Tue Nov 28 10:33:53 2006 -0500
@@ -152,7 +152,8 @@ static inline void nop(void) {
 static inline void nop(void) {
 __asm__ __volatile__ ("nop");
 }
-#define cpu_relax() nop()
+/* will need to address thread priorities when we go SMT */
+#define cpu_relax() barrier()
 
 static inline unsigned int mfpir(void)
 {

___
Xen-ppc-devel mailing list
Xen-ppc-devel@lists.xensource.com
http://lists.xensource.com/xen-ppc-devel


Re: [XenPPC] [xenppc-unstable] [XEN][POWERPC] SMP/IPI/MB combined

2006-11-28 Thread Jimi Xenidis


On Nov 28, 2006, at 1:37 AM, Amos Waterland wrote:


This will have to be reworked and broken up into individual parts for
submission, but here is what is necessary to make 'C-a C-a C-a t' work
properly.

Jimi, when you disassemble xen-syms compiled without this patch, do  
you

see a bogus infinite loop in read_clocks?

looked briefly but did not notice it.


  The compiler is not told that
read_clocks_cpumask is volatile, so it is free to turn that loop  
into an
infinite loop, as my gcc 4.1.1 cross-compiler does.  I am surprised  
that

other Xen architectures have not seen the same problem.


Found it, cpu_relax() is supposed to contain barrier() call.
My fault but it coulda been hollis :)

diff -r cc45282daf3d xen/include/asm-powerpc/processor.h
--- a/xen/include/asm-powerpc/processor.h   Mon Nov 27 17:17:07 2006 -0500
+++ b/xen/include/asm-powerpc/processor.h   Tue Nov 28 10:19:09 2006 -0500
@@ -152,7 +152,7 @@ static inline void nop(void) {
static inline void nop(void) {
 __asm__ __volatile__ ("nop");
}
-#define cpu_relax() nop()
+#define cpu_relax() barrier()
static inline unsigned int mfpir(void)
{

this will solve the volatile issue and am pushing this now.

more below..





 arch/powerpc/smp.c|   18 ++
 common/keyhandler.c   |2 +-
 include/xen/cpumask.h |2 +-
 3 files changed, 12 insertions(+), 10 deletions(-)

diff -r 305751a5281e xen/arch/powerpc/smp.c
--- a/xen/arch/powerpc/smp.cWed Nov 22 16:29:25 2006 -0500
+++ b/xen/arch/powerpc/smp.cTue Nov 28 00:45:24 2006 -0500
@@ -91,31 +91,33 @@ int on_selected_cpus(
 int wait)
 {
 int t, retval = 0, nr_cpus = cpus_weight(selected);
+int stall = timebase_freq * 10;

 spin_lock(&call_lock);

 call_data.func = func;
 call_data.info = info;
 call_data.wait = wait;
-call_data.wait = 1;  /* Until we get RCU around call_data.  */
 atomic_set(&call_data.started, 0);
 atomic_set(&call_data.finished, 0);
 mb();

 send_IPI_mask(selected, CALL_FUNCTION_VECTOR);

-/* We always wait for an initiation ACK from remote CPU.  */
-for (t = 0; atomic_read(&call_data.started) != nr_cpus; t++) {
-if (t && t % timebase_freq == 0) {
-printk("IPI start stall: %d ACKS to %d SYNS\n",
-   atomic_read(&call_data.started), nr_cpus);
-}
+/* If told to, we wait for an initiation ACK from remote CPU.  */
+if (wait) {
+   for (t = 0; atomic_read(&call_data.started) != nr_cpus; t++) {
+   if (t > 0 && t % stall == 0) {
+   printk("IPI start stall: %d ACKS to %d SYNS\n",
+  atomic_read(&call_data.started), nr_cpus);
+   }
+   }


you _always_ have to wait for call_data.started it means that its  
safe to reuse call_data.



 }

 /* If told to, we wait for a completion ACK from remote CPU.  */
 if (wait) {
 for (t = 0; atomic_read(&call_data.finished) != nr_cpus; t+ 
+) {

-if (t > timebase_freq && t % timebase_freq == 0) {
+if (t > 0 && t % stall == 0) {
 printk("IPI finish stall: %d ACKS to %d SYNS\n",
atomic_read(&call_data.finished), nr_cpus);
 }
diff -r 305751a5281e xen/common/keyhandler.c
--- a/xen/common/keyhandler.c   Wed Nov 22 16:29:25 2006 -0500
+++ b/xen/common/keyhandler.c   Tue Nov 28 00:12:14 2006 -0500
@@ -193,7 +193,7 @@ static void dump_domains(unsigned char k
 read_unlock(&domlist_lock);
 }



___
Xen-ppc-devel mailing list
Xen-ppc-devel@lists.xensource.com
http://lists.xensource.com/xen-ppc-devel


[XenPPC] multicast function invocations

2006-11-28 Thread Amos Waterland
To summarize the situation, I found two problems.

1. Core Xen has a bug (I believe) in which they do not mark their cpu
   mask volatile, so the compiler generates an infinite loop in read_clocks.

   I will send some patches upstream to resolve this issue.

2. Xen/PPC has a problem in that its IPI callbacks (remote function 
   invocations) do not actually happen in parallel, which breaks the
   design of read_clocks.  Our IPI callbacks are serialized by the
   design we copied from Xen/x86, which is to acquire a per-vector lock
   very early in the EE handling path (see do_external).

   I guess my real question is: will Xen/PPC ever in the future run its
   IPI remote function callbacks with EE enabled?  If the plan is to
   keep things the way they are now, then we should remove the
   per-vector lock entirely.

The following is a patch that implements the above two conclusions and
which allows 'C-aC-aC-at' to work properly.  Comments?

---

 arch/powerpc/external.c |2 --
 common/keyhandler.c |2 +-
 include/xen/cpumask.h   |2 +-
 3 files changed, 2 insertions(+), 4 deletions(-)

diff -r 305751a5281e xen/arch/powerpc/external.c
--- a/xen/arch/powerpc/external.c   Wed Nov 22 16:29:25 2006 -0500
+++ b/xen/arch/powerpc/external.c   Tue Nov 28 03:07:10 2006 -0500
@@ -86,11 +86,9 @@ void do_external(struct cpu_user_regs *r
 /* do_IRQ is fundamentally broken for reliable IPI delivery.  */
 irq_desc_t *desc = &irq_desc[vec];
 regs->entry_vector = vec;
-spin_lock(&desc->lock);
 desc->handler->ack(vec);
 desc->action->handler(vector_to_irq(vec), desc->action->dev_id, regs);
 desc->handler->end(vec);
-spin_unlock(&desc->lock);
 } else if (vec != -1) {
 DBG("EE:0x%lx isrc: %d\n", regs->msr, vec);
 regs->entry_vector = vec;
diff -r 305751a5281e xen/common/keyhandler.c
--- a/xen/common/keyhandler.c   Wed Nov 22 16:29:25 2006 -0500
+++ b/xen/common/keyhandler.c   Tue Nov 28 03:06:24 2006 -0500
@@ -193,7 +193,7 @@ static void dump_domains(unsigned char k
 read_unlock(&domlist_lock);
 }
 
-static cpumask_t read_clocks_cpumask = CPU_MASK_NONE;
+static cpumask_t volatile read_clocks_cpumask = CPU_MASK_NONE;
 static s_time_t read_clocks_time[NR_CPUS];
 
 static void read_clocks_slave(void *unused)
diff -r 305751a5281e xen/include/xen/cpumask.h
--- a/xen/include/xen/cpumask.h Wed Nov 22 16:29:25 2006 -0500
+++ b/xen/include/xen/cpumask.h Tue Nov 28 03:06:24 2006 -0500
@@ -177,7 +177,7 @@ static inline int __cpus_subset(const cp
 }
 
 #define cpus_empty(src) __cpus_empty(&(src), NR_CPUS)
-static inline int __cpus_empty(const cpumask_t *srcp, int nbits)
+static inline int __cpus_empty(const cpumask_t volatile *srcp, int nbits)
 {
return bitmap_empty(srcp->bits, nbits);
 }

___
Xen-ppc-devel mailing list
Xen-ppc-devel@lists.xensource.com
http://lists.xensource.com/xen-ppc-devel