Re: [PATCH v2 -tip 1/5] x86, MSI: Support multiple MSIs in presense of IRQ remapping

2012-09-04 Thread Alexander Gordeev
On Mon, Sep 03, 2012 at 11:53:39AM -0700, Yinghai Lu wrote:
> On Mon, Sep 3, 2012 at 2:17 AM, Alexander Gordeev  wrote:
> You may update create_irq_nr to be __create_irq_nr, and it could take
> extra count.
> 
> and later have create_irq_nr to be __create_irq_nr(,1,)
> and create_irqs to be __create_irq_nr(,count,)
> 

Indeed. Will do.

> BTW, in short, how much performance benefits for adding 500 lines code?

Unfortunatelly, I do not have a short answer here. There are three types
of performance this series deals with - I'll try to summarize:

- devices - 3 SATA HDDs generate roughly one interrupt every 273 us while
  it get handled in less than 5 us. So there is/could be no increase here;

- the hardware context interrupt handler - its performance dropped 2.5 times
  (little bit more in fact) at the expense of increase of 1.3 times in
  overall interrupt handling time (hardware context + threaded context);

- overall system performance - I *assume* it should increase, because:
  (a) AHCI interrupt handlers keep local interrupts disabled 2.5 times less
  (b) separate AHCI IRQs become subjects of IRQ balancing
  (c) threaded handlers are per-device, per-CPU (well, up to irqbalanced)
  and executed with local interrupts enabled;

-- 
Regards,
Alexander Gordeev
agord...@redhat.com
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 -tip 1/5] x86, MSI: Support multiple MSIs in presense of IRQ remapping

2012-09-03 Thread Yinghai Lu
On Mon, Sep 3, 2012 at 2:17 AM, Alexander Gordeev  wrote:
> The MSI specification has several constraints in comparison with MSI-X,
> most notable of them is the inability to configure MSIs independently.
> As a result, it is impossible to dispatch interrupts from different
> queues to different CPUs. This is largely devalues the support of
> multiple MSIs in SMP systems.
>
> Also, a necessity to allocate a contiguous block of vector numbers for
> devices capable of multiple MSIs might cause a considerable pressure on
> x86 interrupt vector allocator and could lead to fragmentation of the
> interrupt vectors space.
>
> This patch overcomes both drawbacks in presense of IRQ remapping and
> lets devices take advantage of multiple queues and per-IRQ affinity
> assignments.
>
> Signed-off-by: Alexander Gordeev 
> ---
>  arch/x86/kernel/apic/io_apic.c |  166 +--
>  include/linux/irq.h|6 ++
>  kernel/irq/chip.c  |   30 +--
>  kernel/irq/irqdesc.c   |   31 
>  4 files changed, 216 insertions(+), 17 deletions(-)
>
> diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
> index c265593..5fd2577 100644
> --- a/arch/x86/kernel/apic/io_apic.c
> +++ b/arch/x86/kernel/apic/io_apic.c
> @@ -305,6 +305,11 @@ static int alloc_irq_from(unsigned int from, int node)
> return irq_alloc_desc_from(from, node);
>  }
>
> +static int alloc_irqs_from(unsigned int from, unsigned int count, int node)
> +{
> +   return irq_alloc_descs_from(from, count, node);
> +}
> +
>  static void free_irq_at(unsigned int at, struct irq_cfg *cfg)
>  {
> free_irq_cfg(at, cfg);
> @@ -3039,6 +3044,55 @@ int create_irq(void)
> return irq;
>  }
>
> +unsigned int create_irqs(unsigned int from, unsigned int count, int node)
> +{
> +   struct irq_cfg **cfg;
> +   unsigned long flags;
> +   int irq, i;
> +
> +   if (from < nr_irqs_gsi)
> +   from = nr_irqs_gsi;
> +
> +   cfg = kzalloc_node(count * sizeof(cfg[0]), GFP_KERNEL, node);
> +   if (!cfg)
> +   return 0;
> +
> +   irq = alloc_irqs_from(from, count, node);
> +   if (irq < 0)
> +   goto out_cfgs;
> +
> +   for (i = 0; i < count; i++) {
> +   cfg[i] = alloc_irq_cfg(irq + i, node);
> +   if (!cfg[i])
> +   goto out_irqs;
> +   }
> +
> +   raw_spin_lock_irqsave(&vector_lock, flags);
> +   for (i = 0; i < count; i++)
> +   if (__assign_irq_vector(irq + i, cfg[i], apic->target_cpus()))
> +   goto out_vecs;
> +   raw_spin_unlock_irqrestore(&vector_lock, flags);
> +
> +   for (i = 0; i < count; i++) {
> +   irq_set_chip_data(irq + i, cfg[i]);
> +   irq_clear_status_flags(irq + i, IRQ_NOREQUEST);
> +   }
> +
> +   kfree(cfg);
> +   return irq;
> +
> +out_vecs:
> +   for (; i; i--)
> +   __clear_irq_vector(irq + i - 1, cfg[i - 1]);
> +   raw_spin_unlock_irqrestore(&vector_lock, flags);
> +out_irqs:
> +   for (i = 0; i < count; i++)
> +   free_irq_at(irq + i, cfg[i]);
> +out_cfgs:
> +   kfree(cfg);
> +   return 0;
> +}
> +

You may update create_irq_nr to be __create_irq_nr, and it could take
extra count.

and later have create_irq_nr to be __create_irq_nr(,1,)
and create_irqs to be __create_irq_nr(,count,)


BTW, in short, how much performance benefits for adding 500 lines code?

Thanks

Yinghai
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2 -tip 1/5] x86, MSI: Support multiple MSIs in presense of IRQ remapping

2012-09-03 Thread Alexander Gordeev
The MSI specification has several constraints in comparison with MSI-X,
most notable of them is the inability to configure MSIs independently.
As a result, it is impossible to dispatch interrupts from different
queues to different CPUs. This is largely devalues the support of
multiple MSIs in SMP systems.

Also, a necessity to allocate a contiguous block of vector numbers for
devices capable of multiple MSIs might cause a considerable pressure on
x86 interrupt vector allocator and could lead to fragmentation of the
interrupt vectors space.

This patch overcomes both drawbacks in presense of IRQ remapping and
lets devices take advantage of multiple queues and per-IRQ affinity
assignments.

Signed-off-by: Alexander Gordeev 
---
 arch/x86/kernel/apic/io_apic.c |  166 +--
 include/linux/irq.h|6 ++
 kernel/irq/chip.c  |   30 +--
 kernel/irq/irqdesc.c   |   31 
 4 files changed, 216 insertions(+), 17 deletions(-)

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index c265593..5fd2577 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -305,6 +305,11 @@ static int alloc_irq_from(unsigned int from, int node)
return irq_alloc_desc_from(from, node);
 }
 
+static int alloc_irqs_from(unsigned int from, unsigned int count, int node)
+{
+   return irq_alloc_descs_from(from, count, node);
+}
+
 static void free_irq_at(unsigned int at, struct irq_cfg *cfg)
 {
free_irq_cfg(at, cfg);
@@ -3039,6 +3044,55 @@ int create_irq(void)
return irq;
 }
 
+unsigned int create_irqs(unsigned int from, unsigned int count, int node)
+{
+   struct irq_cfg **cfg;
+   unsigned long flags;
+   int irq, i;
+
+   if (from < nr_irqs_gsi)
+   from = nr_irqs_gsi;
+
+   cfg = kzalloc_node(count * sizeof(cfg[0]), GFP_KERNEL, node);
+   if (!cfg)
+   return 0;
+
+   irq = alloc_irqs_from(from, count, node);
+   if (irq < 0)
+   goto out_cfgs;
+
+   for (i = 0; i < count; i++) {
+   cfg[i] = alloc_irq_cfg(irq + i, node);
+   if (!cfg[i])
+   goto out_irqs;
+   }
+
+   raw_spin_lock_irqsave(&vector_lock, flags);
+   for (i = 0; i < count; i++)
+   if (__assign_irq_vector(irq + i, cfg[i], apic->target_cpus()))
+   goto out_vecs;
+   raw_spin_unlock_irqrestore(&vector_lock, flags);
+
+   for (i = 0; i < count; i++) {
+   irq_set_chip_data(irq + i, cfg[i]);
+   irq_clear_status_flags(irq + i, IRQ_NOREQUEST);
+   }
+
+   kfree(cfg);
+   return irq;
+
+out_vecs:
+   for (; i; i--)
+   __clear_irq_vector(irq + i - 1, cfg[i - 1]);
+   raw_spin_unlock_irqrestore(&vector_lock, flags);
+out_irqs:
+   for (i = 0; i < count; i++)
+   free_irq_at(irq + i, cfg[i]);
+out_cfgs:
+   kfree(cfg);
+   return 0;
+}
+
 void destroy_irq(unsigned int irq)
 {
struct irq_cfg *cfg = irq_get_chip_data(irq);
@@ -3054,6 +3108,27 @@ void destroy_irq(unsigned int irq)
free_irq_at(irq, cfg);
 }
 
+static inline void destroy_irqs(unsigned int irq, unsigned int count)
+{
+   unsigned int i;
+   for (i = 0; i < count; i++)
+   destroy_irq(irq + i);
+}
+
+static inline int
+can_create_pow_of_two_irqs(unsigned int from, unsigned int count)
+{
+   if ((count > 1) && (count % 2))
+   return -EINVAL;
+
+   for (; count; count = count / 2) {
+   if (!irq_can_alloc_irqs(from, count))
+   return count;
+   }
+
+   return -ENOSPC;
+}
+
 /*
  * MSI message composition
  */
@@ -3145,18 +3220,25 @@ static struct irq_chip msi_chip = {
.irq_retrigger  = ioapic_retrigger_irq,
 };
 
-static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int 
irq)
+static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
+unsigned int irq_base, unsigned int irq_offset)
 {
struct irq_chip *chip = &msi_chip;
struct msi_msg msg;
+   unsigned int irq = irq_base + irq_offset;
int ret;
 
ret = msi_compose_msg(dev, irq, &msg, -1);
if (ret < 0)
return ret;
 
-   irq_set_msi_desc(irq, msidesc);
-   write_msi_msg(irq, &msg);
+   irq_set_msi_desc_off(irq_base, irq_offset, msidesc);
+
+   /* MSI-X message is written per-IRQ, the offset is always 0.
+* MSI message denotes a contiguous group of IRQs, written for 0th IRQ.
+*/
+   if (!irq_offset)
+   write_msi_msg(irq, &msg);
 
if (irq_remapped(irq_get_chip_data(irq))) {
irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
@@ -3170,16 +3252,12 @@ static int setup_msi_irq(struct pci_dev *dev, struct 
msi_desc *msidesc, int irq)
return 0;
 }
 
-int native_setup_msi_irqs(struct pci_