[tip:x86/irq] x86/irq: Fix fixup_irqs() error handling

2014-04-16 Thread tip-bot for Prarit Bhargava
Commit-ID:  fb24da805729ee4a83efa34015948f7d64da4b28
Gitweb: http://git.kernel.org/tip/fb24da805729ee4a83efa34015948f7d64da4b28
Author: Prarit Bhargava 
AuthorDate: Wed, 2 Apr 2014 08:11:13 -0400
Committer:  Ingo Molnar 
CommitDate: Wed, 16 Apr 2014 13:30:49 +0200

x86/irq: Fix fixup_irqs() error handling

Several patches to fix cpu hotplug and the down'd cpu's irq
relocations have been submitted in the past month or so.  The
patches should resolve the problems with cpu hotplug and irq
relocation, however, there is always a possibility that a bug
still exists.  The big problem with debugging these irq
reassignments is that the cpu down completes and then we get
random stack traces from drivers for which irqs have not been
properly assigned to a new cpu.  The stack traces are a mix of
storage, network, and other kernel subsystem (I once saw the
serial port stop working ...) warnings and failures.

The problem with these failures is that they are difficult to
diagnose. There is no warning in the cpu hotplug down path to
indicate that an IRQ has failed to be assigned to a new cpu, and
all we are left with is a stack trace from a driver, or a
non-functional device.  If we had some information on the
console debugging these situations would be much easier; after
all we can map an IRQ to a device by simply using lspci or
/proc/interrupts.

The current code, fixup_irqs(), which migrates IRQs from the
down'd cpu and is called close to the end of the cpu down path,
calls chip->set_irq_affinity which eventually calls
__assign_irq_vector(). Errors are not propogated back from this
function call and this results in silent irq relocation
failures.

This patch fixes this issue by returning the error codes up the
call stack and prints out a warning if there is a relocation
failure.

Signed-off-by: Prarit Bhargava 
Acked-by: Thomas Gleixner 
Cc: Rui Wang 
Cc: Liu Ping Fan 
Cc: Bjorn Helgaas 
Cc: Yoshihiro YUNOMAE 
Cc: Lv Zheng 
Cc: Seiji Aguchi 
Cc: Yang Zhang 
Cc: Andi Kleen 
Cc: Steven Rostedt (Red Hat) 
Cc: Li Fei 
Cc: gong.c...@linux.intel.com
Link: 
http://lkml.kernel.org/r/1396440673-18286-1-git-send-email-pra...@redhat.com
[ Made small cleanliness tweaks. ]
Signed-off-by: Ingo Molnar 
---
 arch/x86/kernel/apic/io_apic.c | 28 ++--
 arch/x86/kernel/irq.c  | 13 +
 2 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 6ad4658..b4b21db 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2312,7 +2312,7 @@ int __ioapic_set_affinity(struct irq_data *data, const 
struct cpumask *mask,
int err;
 
if (!config_enabled(CONFIG_SMP))
-   return -1;
+   return -EPERM;
 
if (!cpumask_intersects(mask, cpu_online_mask))
return -EINVAL;
@@ -2343,7 +2343,7 @@ int native_ioapic_set_affinity(struct irq_data *data,
int ret;
 
if (!config_enabled(CONFIG_SMP))
-   return -1;
+   return -EPERM;
 
raw_spin_lock_irqsave(_lock, flags);
ret = __ioapic_set_affinity(data, mask, );
@@ -3075,9 +3075,11 @@ msi_set_affinity(struct irq_data *data, const struct 
cpumask *mask, bool force)
struct irq_cfg *cfg = data->chip_data;
struct msi_msg msg;
unsigned int dest;
+   int ret;
 
-   if (__ioapic_set_affinity(data, mask, ))
-   return -1;
+   ret = __ioapic_set_affinity(data, mask, );
+   if (ret)
+   return ret;
 
__get_cached_msi_msg(data->msi_desc, );
 
@@ -3177,9 +3179,11 @@ dmar_msi_set_affinity(struct irq_data *data, const 
struct cpumask *mask,
struct irq_cfg *cfg = data->chip_data;
unsigned int dest, irq = data->irq;
struct msi_msg msg;
+   int ret;
 
-   if (__ioapic_set_affinity(data, mask, ))
-   return -1;
+   ret = __ioapic_set_affinity(data, mask, );
+   if (ret)
+   return ret;
 
dmar_msi_read(irq, );
 
@@ -3226,9 +3230,11 @@ static int hpet_msi_set_affinity(struct irq_data *data,
struct irq_cfg *cfg = data->chip_data;
struct msi_msg msg;
unsigned int dest;
+   int ret;
 
-   if (__ioapic_set_affinity(data, mask, ))
-   return -1;
+   ret = __ioapic_set_affinity(data, mask, );
+   if (ret)
+   return ret;
 
hpet_msi_read(data->handler_data, );
 
@@ -3295,9 +3301,11 @@ ht_set_affinity(struct irq_data *data, const struct 
cpumask *mask, bool force)
 {
struct irq_cfg *cfg = data->chip_data;
unsigned int dest;
+   int ret;
 
-   if (__ioapic_set_affinity(data, mask, ))
-   return -1;
+   ret = __ioapic_set_affinity(data, mask, );
+   if (ret)
+   return ret;
 
target_ht_irq(data->irq, dest, cfg->vector);
return IRQ_SET_MASK_OK_NOCOPY;
diff --git a/arch/x86/kernel/irq.c 

[tip:x86/irq] x86/irq: Fix fixup_irqs() error handling

2014-04-16 Thread tip-bot for Prarit Bhargava
Commit-ID:  fb24da805729ee4a83efa34015948f7d64da4b28
Gitweb: http://git.kernel.org/tip/fb24da805729ee4a83efa34015948f7d64da4b28
Author: Prarit Bhargava pra...@redhat.com
AuthorDate: Wed, 2 Apr 2014 08:11:13 -0400
Committer:  Ingo Molnar mi...@kernel.org
CommitDate: Wed, 16 Apr 2014 13:30:49 +0200

x86/irq: Fix fixup_irqs() error handling

Several patches to fix cpu hotplug and the down'd cpu's irq
relocations have been submitted in the past month or so.  The
patches should resolve the problems with cpu hotplug and irq
relocation, however, there is always a possibility that a bug
still exists.  The big problem with debugging these irq
reassignments is that the cpu down completes and then we get
random stack traces from drivers for which irqs have not been
properly assigned to a new cpu.  The stack traces are a mix of
storage, network, and other kernel subsystem (I once saw the
serial port stop working ...) warnings and failures.

The problem with these failures is that they are difficult to
diagnose. There is no warning in the cpu hotplug down path to
indicate that an IRQ has failed to be assigned to a new cpu, and
all we are left with is a stack trace from a driver, or a
non-functional device.  If we had some information on the
console debugging these situations would be much easier; after
all we can map an IRQ to a device by simply using lspci or
/proc/interrupts.

The current code, fixup_irqs(), which migrates IRQs from the
down'd cpu and is called close to the end of the cpu down path,
calls chip-set_irq_affinity which eventually calls
__assign_irq_vector(). Errors are not propogated back from this
function call and this results in silent irq relocation
failures.

This patch fixes this issue by returning the error codes up the
call stack and prints out a warning if there is a relocation
failure.

Signed-off-by: Prarit Bhargava pra...@redhat.com
Acked-by: Thomas Gleixner t...@linutronix.de
Cc: Rui Wang rui.y.w...@intel.com
Cc: Liu Ping Fan kernelf...@gmail.com
Cc: Bjorn Helgaas bhelg...@google.com
Cc: Yoshihiro YUNOMAE yoshihiro.yunomae...@hitachi.com
Cc: Lv Zheng lv.zh...@intel.com
Cc: Seiji Aguchi seiji.agu...@hds.com
Cc: Yang Zhang yang.z.zh...@intel.com
Cc: Andi Kleen a...@linux.intel.com
Cc: Steven Rostedt (Red Hat) rost...@goodmis.org
Cc: Li Fei fei...@intel.com
Cc: gong.c...@linux.intel.com
Link: 
http://lkml.kernel.org/r/1396440673-18286-1-git-send-email-pra...@redhat.com
[ Made small cleanliness tweaks. ]
Signed-off-by: Ingo Molnar mi...@kernel.org
---
 arch/x86/kernel/apic/io_apic.c | 28 ++--
 arch/x86/kernel/irq.c  | 13 +
 2 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 6ad4658..b4b21db 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2312,7 +2312,7 @@ int __ioapic_set_affinity(struct irq_data *data, const 
struct cpumask *mask,
int err;
 
if (!config_enabled(CONFIG_SMP))
-   return -1;
+   return -EPERM;
 
if (!cpumask_intersects(mask, cpu_online_mask))
return -EINVAL;
@@ -2343,7 +2343,7 @@ int native_ioapic_set_affinity(struct irq_data *data,
int ret;
 
if (!config_enabled(CONFIG_SMP))
-   return -1;
+   return -EPERM;
 
raw_spin_lock_irqsave(ioapic_lock, flags);
ret = __ioapic_set_affinity(data, mask, dest);
@@ -3075,9 +3075,11 @@ msi_set_affinity(struct irq_data *data, const struct 
cpumask *mask, bool force)
struct irq_cfg *cfg = data-chip_data;
struct msi_msg msg;
unsigned int dest;
+   int ret;
 
-   if (__ioapic_set_affinity(data, mask, dest))
-   return -1;
+   ret = __ioapic_set_affinity(data, mask, dest);
+   if (ret)
+   return ret;
 
__get_cached_msi_msg(data-msi_desc, msg);
 
@@ -3177,9 +3179,11 @@ dmar_msi_set_affinity(struct irq_data *data, const 
struct cpumask *mask,
struct irq_cfg *cfg = data-chip_data;
unsigned int dest, irq = data-irq;
struct msi_msg msg;
+   int ret;
 
-   if (__ioapic_set_affinity(data, mask, dest))
-   return -1;
+   ret = __ioapic_set_affinity(data, mask, dest);
+   if (ret)
+   return ret;
 
dmar_msi_read(irq, msg);
 
@@ -3226,9 +3230,11 @@ static int hpet_msi_set_affinity(struct irq_data *data,
struct irq_cfg *cfg = data-chip_data;
struct msi_msg msg;
unsigned int dest;
+   int ret;
 
-   if (__ioapic_set_affinity(data, mask, dest))
-   return -1;
+   ret = __ioapic_set_affinity(data, mask, dest);
+   if (ret)
+   return ret;
 
hpet_msi_read(data-handler_data, msg);
 
@@ -3295,9 +3301,11 @@ ht_set_affinity(struct irq_data *data, const struct 
cpumask *mask, bool force)
 {
struct irq_cfg *cfg = data-chip_data;
unsigned int dest;
+