Re: [PATCH v2] irqchip: gicv3-its: Don't assume GICv3 hardware supports 16bit INTID

2017-06-22 Thread Marc Zyngier
On 22/06/17 18:44, Shanker Donthineni wrote:
> The current ITS driver is assuming every ITS hardware implementation
> supports minimum of 16bit INTID. But this is not true, as per GICv3
> specification, INTID field is IMPLEMENTATION DEFINED in the range of
> 14-24 bits. We might see an unpredictable system behavior on systems
> where hardware support less than 16bits and software tries to use
> 64K LPI interrupts.
> 
> On Qualcomm Datacenter Technologies QDF2400 platform, boot log shows
> confusing information about number of LPI chunks as shown below. The
> QDF2400 ITS hardware supports 24bit INTID.
> 
> This patch allocates the memory resources for PEND/PROP tables based
> on discoverable value which is specified in GITS_TYPER.IDbits. Also
> taking this opportunity to increase number of LPI/MSI(x) to 128K if
> the hardware is capable, and show log message that reflects the
> correct number of LPI chunks.
> 
> ITS@0xff7efe: allocated 524288 Devices @3c040 (indirect, esz 8, psz 
> 64K, shr 1)
> ITS@0xff7efe: allocated 8192 Interrupt Collections @3c013 (flat, esz 
> 8, psz 64K, shr 1)
> ITS@0xff7efe: allocated 8192 Virtual CPUs @3c014 (flat, esz 8, psz 
> 64K, shr 1)
> ITS: Allocated 524032 chunks for LPIs
> PCI/MSI: ITS@0xff7efe domain created
> Platform MSI: ITS@0xff7efe domain created

I seriously doubt that anyone will ever see a shortage of LPIs with 
16bit IDs ("64k interrupts should be enough for everybody"). But if you 
think otherwise, fair enough. Comments on the actual patch:

> 
> Signed-off-by: Shanker Donthineni 
> ---
> Changes since v1:
>No code changes, just rebase on tip of the Marc's branch and tested on 
> QDF2400 platform.
>
> https://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms.git/log/?h=irq/irqchip-4.13
> 
>  drivers/irqchip/irq-gic-v3-its.c | 34 --
>  1 file changed, 16 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/irqchip/irq-gic-v3-its.c 
> b/drivers/irqchip/irq-gic-v3-its.c
> index fee7d13..6000c56 100644
> --- a/drivers/irqchip/irq-gic-v3-its.c
> +++ b/drivers/irqchip/irq-gic-v3-its.c
> @@ -691,9 +691,11 @@ static void its_irq_compose_msi_msg(struct irq_data *d, 
> struct msi_msg *msg)
>   */
>  #define IRQS_PER_CHUNK_SHIFT 5
>  #define IRQS_PER_CHUNK   (1 << IRQS_PER_CHUNK_SHIFT)
> +#define ITS_MAX_LPI_NRBITS   (17) /* 128K LPIs */
>  
>  static unsigned long *lpi_bitmap;
>  static u32 lpi_chunks;
> +static u32 lpi_nrbits;
>  static DEFINE_SPINLOCK(lpi_lock);
>  
>  static int its_lpi_to_chunk(int lpi)
> @@ -789,26 +791,19 @@ static void its_lpi_free(struct event_lpi_map *map)
>  }
>  
>  /*
> - * We allocate 64kB for PROPBASE. That gives us at most 64K LPIs to
> + * We allocate memory for PROPBASE to cover 2 ^ lpi_nrbits LPIs to
>   * deal with (one configuration byte per interrupt). PENDBASE has to
>   * be 64kB aligned (one bit per LPI, plus 8192 bits for SPI/PPI/SGI).
>   */
> -#define LPI_PROPBASE_SZ  SZ_64K
> -#define LPI_PENDBASE_SZ  (LPI_PROPBASE_SZ / 8 + SZ_1K)

Why don't you keep the same macros and update them to deal with a
variable? It would make the patch so much easier to review.

Something along the lines of (untested):

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 63cd0f2b8707..a1891840c66a 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -691,8 +691,10 @@ static struct irq_chip its_irq_chip = {
  */
 #define IRQS_PER_CHUNK_SHIFT   5
 #define IRQS_PER_CHUNK (1 << IRQS_PER_CHUNK_SHIFT)
+#define ITS_MAX_LPI_NRBITS (17) /* 128K LPIs */
 
 static unsigned long *lpi_bitmap;
+static u32 lpi_nrbits;
 static u32 lpi_chunks;
 static DEFINE_SPINLOCK(lpi_lock);
 
@@ -706,9 +708,9 @@ static int its_chunk_to_lpi(int chunk)
return (chunk << IRQS_PER_CHUNK_SHIFT) + 8192;
 }
 
-static int __init its_lpi_init(u32 id_bits)
+static int __init its_lpi_init(void)
 {
-   lpi_chunks = its_lpi_to_chunk(1UL << id_bits);
+   lpi_chunks = its_lpi_to_chunk(1UL << lpi_nrbits);
 
lpi_bitmap = kzalloc(BITS_TO_LONGS(lpi_chunks) * sizeof(long),
 GFP_KERNEL);
@@ -793,13 +795,9 @@ static void its_lpi_free(struct event_lpi_map *map)
  * deal with (one configuration byte per interrupt). PENDBASE has to
  * be 64kB aligned (one bit per LPI, plus 8192 bits for SPI/PPI/SGI).
  */
-#define LPI_PROPBASE_SZSZ_64K
-#define LPI_PENDBASE_SZ(LPI_PROPBASE_SZ / 8 + SZ_1K)
-
-/*
- * This is how many bits of ID we need, including the useless ones.
- */
-#define LPI_NRBITS ilog2(LPI_PROPBASE_SZ + SZ_8K)
+#define LPI_PROPBASE_SZALIGN(BIT(lpi_nrbits), SZ_64K)
+#define LPI_PENDBASE_SZALIGN(BIT(lpi_nrbits) / 8, SZ_64K)
+#define LPI_NRBITS lpi_nrbits
 
 #define LPI_PROP_DEFAULT_PRIO  0xa0
 
@@ -807,6 +805,7 @@ static int __init 

Re: [PATCH v2] irqchip: gicv3-its: Don't assume GICv3 hardware supports 16bit INTID

2017-06-22 Thread Marc Zyngier
On 22/06/17 18:44, Shanker Donthineni wrote:
> The current ITS driver is assuming every ITS hardware implementation
> supports minimum of 16bit INTID. But this is not true, as per GICv3
> specification, INTID field is IMPLEMENTATION DEFINED in the range of
> 14-24 bits. We might see an unpredictable system behavior on systems
> where hardware support less than 16bits and software tries to use
> 64K LPI interrupts.
> 
> On Qualcomm Datacenter Technologies QDF2400 platform, boot log shows
> confusing information about number of LPI chunks as shown below. The
> QDF2400 ITS hardware supports 24bit INTID.
> 
> This patch allocates the memory resources for PEND/PROP tables based
> on discoverable value which is specified in GITS_TYPER.IDbits. Also
> taking this opportunity to increase number of LPI/MSI(x) to 128K if
> the hardware is capable, and show log message that reflects the
> correct number of LPI chunks.
> 
> ITS@0xff7efe: allocated 524288 Devices @3c040 (indirect, esz 8, psz 
> 64K, shr 1)
> ITS@0xff7efe: allocated 8192 Interrupt Collections @3c013 (flat, esz 
> 8, psz 64K, shr 1)
> ITS@0xff7efe: allocated 8192 Virtual CPUs @3c014 (flat, esz 8, psz 
> 64K, shr 1)
> ITS: Allocated 524032 chunks for LPIs
> PCI/MSI: ITS@0xff7efe domain created
> Platform MSI: ITS@0xff7efe domain created

I seriously doubt that anyone will ever see a shortage of LPIs with 
16bit IDs ("64k interrupts should be enough for everybody"). But if you 
think otherwise, fair enough. Comments on the actual patch:

> 
> Signed-off-by: Shanker Donthineni 
> ---
> Changes since v1:
>No code changes, just rebase on tip of the Marc's branch and tested on 
> QDF2400 platform.
>
> https://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms.git/log/?h=irq/irqchip-4.13
> 
>  drivers/irqchip/irq-gic-v3-its.c | 34 --
>  1 file changed, 16 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/irqchip/irq-gic-v3-its.c 
> b/drivers/irqchip/irq-gic-v3-its.c
> index fee7d13..6000c56 100644
> --- a/drivers/irqchip/irq-gic-v3-its.c
> +++ b/drivers/irqchip/irq-gic-v3-its.c
> @@ -691,9 +691,11 @@ static void its_irq_compose_msi_msg(struct irq_data *d, 
> struct msi_msg *msg)
>   */
>  #define IRQS_PER_CHUNK_SHIFT 5
>  #define IRQS_PER_CHUNK   (1 << IRQS_PER_CHUNK_SHIFT)
> +#define ITS_MAX_LPI_NRBITS   (17) /* 128K LPIs */
>  
>  static unsigned long *lpi_bitmap;
>  static u32 lpi_chunks;
> +static u32 lpi_nrbits;
>  static DEFINE_SPINLOCK(lpi_lock);
>  
>  static int its_lpi_to_chunk(int lpi)
> @@ -789,26 +791,19 @@ static void its_lpi_free(struct event_lpi_map *map)
>  }
>  
>  /*
> - * We allocate 64kB for PROPBASE. That gives us at most 64K LPIs to
> + * We allocate memory for PROPBASE to cover 2 ^ lpi_nrbits LPIs to
>   * deal with (one configuration byte per interrupt). PENDBASE has to
>   * be 64kB aligned (one bit per LPI, plus 8192 bits for SPI/PPI/SGI).
>   */
> -#define LPI_PROPBASE_SZ  SZ_64K
> -#define LPI_PENDBASE_SZ  (LPI_PROPBASE_SZ / 8 + SZ_1K)

Why don't you keep the same macros and update them to deal with a
variable? It would make the patch so much easier to review.

Something along the lines of (untested):

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 63cd0f2b8707..a1891840c66a 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -691,8 +691,10 @@ static struct irq_chip its_irq_chip = {
  */
 #define IRQS_PER_CHUNK_SHIFT   5
 #define IRQS_PER_CHUNK (1 << IRQS_PER_CHUNK_SHIFT)
+#define ITS_MAX_LPI_NRBITS (17) /* 128K LPIs */
 
 static unsigned long *lpi_bitmap;
+static u32 lpi_nrbits;
 static u32 lpi_chunks;
 static DEFINE_SPINLOCK(lpi_lock);
 
@@ -706,9 +708,9 @@ static int its_chunk_to_lpi(int chunk)
return (chunk << IRQS_PER_CHUNK_SHIFT) + 8192;
 }
 
-static int __init its_lpi_init(u32 id_bits)
+static int __init its_lpi_init(void)
 {
-   lpi_chunks = its_lpi_to_chunk(1UL << id_bits);
+   lpi_chunks = its_lpi_to_chunk(1UL << lpi_nrbits);
 
lpi_bitmap = kzalloc(BITS_TO_LONGS(lpi_chunks) * sizeof(long),
 GFP_KERNEL);
@@ -793,13 +795,9 @@ static void its_lpi_free(struct event_lpi_map *map)
  * deal with (one configuration byte per interrupt). PENDBASE has to
  * be 64kB aligned (one bit per LPI, plus 8192 bits for SPI/PPI/SGI).
  */
-#define LPI_PROPBASE_SZSZ_64K
-#define LPI_PENDBASE_SZ(LPI_PROPBASE_SZ / 8 + SZ_1K)
-
-/*
- * This is how many bits of ID we need, including the useless ones.
- */
-#define LPI_NRBITS ilog2(LPI_PROPBASE_SZ + SZ_8K)
+#define LPI_PROPBASE_SZALIGN(BIT(lpi_nrbits), SZ_64K)
+#define LPI_PENDBASE_SZALIGN(BIT(lpi_nrbits) / 8, SZ_64K)
+#define LPI_NRBITS lpi_nrbits
 
 #define LPI_PROP_DEFAULT_PRIO  0xa0
 
@@ -807,6 +805,7 @@ static int __init its_alloc_lpi_tables(void)

[PATCH v2] irqchip: gicv3-its: Don't assume GICv3 hardware supports 16bit INTID

2017-06-22 Thread Shanker Donthineni
The current ITS driver is assuming every ITS hardware implementation
supports minimum of 16bit INTID. But this is not true, as per GICv3
specification, INTID field is IMPLEMENTATION DEFINED in the range of
14-24 bits. We might see an unpredictable system behavior on systems
where hardware support less than 16bits and software tries to use
64K LPI interrupts.

On Qualcomm Datacenter Technologies QDF2400 platform, boot log shows
confusing information about number of LPI chunks as shown below. The
QDF2400 ITS hardware supports 24bit INTID.

This patch allocates the memory resources for PEND/PROP tables based
on discoverable value which is specified in GITS_TYPER.IDbits. Also
taking this opportunity to increase number of LPI/MSI(x) to 128K if
the hardware is capable, and show log message that reflects the
correct number of LPI chunks.

ITS@0xff7efe: allocated 524288 Devices @3c040 (indirect, esz 8, psz 
64K, shr 1)
ITS@0xff7efe: allocated 8192 Interrupt Collections @3c013 (flat, esz 8, 
psz 64K, shr 1)
ITS@0xff7efe: allocated 8192 Virtual CPUs @3c014 (flat, esz 8, psz 64K, 
shr 1)
ITS: Allocated 524032 chunks for LPIs
PCI/MSI: ITS@0xff7efe domain created
Platform MSI: ITS@0xff7efe domain created

Signed-off-by: Shanker Donthineni 
---
Changes since v1:
   No code changes, just rebase on tip of the Marc's branch and tested on 
QDF2400 platform.
   
https://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms.git/log/?h=irq/irqchip-4.13

 drivers/irqchip/irq-gic-v3-its.c | 34 --
 1 file changed, 16 insertions(+), 18 deletions(-)

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index fee7d13..6000c56 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -691,9 +691,11 @@ static void its_irq_compose_msi_msg(struct irq_data *d, 
struct msi_msg *msg)
  */
 #define IRQS_PER_CHUNK_SHIFT   5
 #define IRQS_PER_CHUNK (1 << IRQS_PER_CHUNK_SHIFT)
+#define ITS_MAX_LPI_NRBITS (17) /* 128K LPIs */
 
 static unsigned long *lpi_bitmap;
 static u32 lpi_chunks;
+static u32 lpi_nrbits;
 static DEFINE_SPINLOCK(lpi_lock);
 
 static int its_lpi_to_chunk(int lpi)
@@ -789,26 +791,19 @@ static void its_lpi_free(struct event_lpi_map *map)
 }
 
 /*
- * We allocate 64kB for PROPBASE. That gives us at most 64K LPIs to
+ * We allocate memory for PROPBASE to cover 2 ^ lpi_nrbits LPIs to
  * deal with (one configuration byte per interrupt). PENDBASE has to
  * be 64kB aligned (one bit per LPI, plus 8192 bits for SPI/PPI/SGI).
  */
-#define LPI_PROPBASE_SZSZ_64K
-#define LPI_PENDBASE_SZ(LPI_PROPBASE_SZ / 8 + SZ_1K)
-
-/*
- * This is how many bits of ID we need, including the useless ones.
- */
-#define LPI_NRBITS ilog2(LPI_PROPBASE_SZ + SZ_8K)
 
 #define LPI_PROP_DEFAULT_PRIO  0xa0
 
 static int __init its_alloc_lpi_tables(void)
 {
+   u32 size = ALIGN(BIT(lpi_nrbits), SZ_64K);
phys_addr_t paddr;
 
-   gic_rdists->prop_page = alloc_pages(GFP_NOWAIT,
-  get_order(LPI_PROPBASE_SZ));
+   gic_rdists->prop_page = alloc_pages(GFP_NOWAIT, get_order(size));
if (!gic_rdists->prop_page) {
pr_err("Failed to allocate PROPBASE\n");
return -ENOMEM;
@@ -820,10 +815,10 @@ static int __init its_alloc_lpi_tables(void)
/* Priority 0xa0, Group-1, disabled */
memset(page_address(gic_rdists->prop_page),
   LPI_PROP_DEFAULT_PRIO | LPI_PROP_GROUP1,
-  LPI_PROPBASE_SZ);
+  size);
 
/* Make sure the GIC will observe the written configuration */
-   gic_flush_dcache_to_poc(page_address(gic_rdists->prop_page), 
LPI_PROPBASE_SZ);
+   gic_flush_dcache_to_poc(page_address(gic_rdists->prop_page), size);
 
return 0;
 }
@@ -1095,12 +1090,14 @@ static void its_cpu_init_lpis(void)
pend_page = gic_data_rdist()->pend_page;
if (!pend_page) {
phys_addr_t paddr;
+   u32 size;
/*
-* The pending pages have to be at least 64kB aligned,
-* hence the 'max(LPI_PENDBASE_SZ, SZ_64K)' below.
+* The pending pages have to be at least 64kB aligned
+* hence the 'ALIGN(BIT(lpi_nrbits)/8, SZ_64K)' below.
 */
+   size = ALIGN(BIT(lpi_nrbits)/8, SZ_64K);
pend_page = alloc_pages(GFP_NOWAIT | __GFP_ZERO,
-   get_order(max(LPI_PENDBASE_SZ, 
SZ_64K)));
+   get_order(size));
if (!pend_page) {
pr_err("Failed to allocate PENDBASE for CPU%d\n",
   smp_processor_id());
@@ -1108,7 +1105,7 @@ static void its_cpu_init_lpis(void)
}
 
/* Make sure the GIC will observe the zero-ed page */
-   

[PATCH v2] irqchip: gicv3-its: Don't assume GICv3 hardware supports 16bit INTID

2017-06-22 Thread Shanker Donthineni
The current ITS driver is assuming every ITS hardware implementation
supports minimum of 16bit INTID. But this is not true, as per GICv3
specification, INTID field is IMPLEMENTATION DEFINED in the range of
14-24 bits. We might see an unpredictable system behavior on systems
where hardware support less than 16bits and software tries to use
64K LPI interrupts.

On Qualcomm Datacenter Technologies QDF2400 platform, boot log shows
confusing information about number of LPI chunks as shown below. The
QDF2400 ITS hardware supports 24bit INTID.

This patch allocates the memory resources for PEND/PROP tables based
on discoverable value which is specified in GITS_TYPER.IDbits. Also
taking this opportunity to increase number of LPI/MSI(x) to 128K if
the hardware is capable, and show log message that reflects the
correct number of LPI chunks.

ITS@0xff7efe: allocated 524288 Devices @3c040 (indirect, esz 8, psz 
64K, shr 1)
ITS@0xff7efe: allocated 8192 Interrupt Collections @3c013 (flat, esz 8, 
psz 64K, shr 1)
ITS@0xff7efe: allocated 8192 Virtual CPUs @3c014 (flat, esz 8, psz 64K, 
shr 1)
ITS: Allocated 524032 chunks for LPIs
PCI/MSI: ITS@0xff7efe domain created
Platform MSI: ITS@0xff7efe domain created

Signed-off-by: Shanker Donthineni 
---
Changes since v1:
   No code changes, just rebase on tip of the Marc's branch and tested on 
QDF2400 platform.
   
https://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms.git/log/?h=irq/irqchip-4.13

 drivers/irqchip/irq-gic-v3-its.c | 34 --
 1 file changed, 16 insertions(+), 18 deletions(-)

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index fee7d13..6000c56 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -691,9 +691,11 @@ static void its_irq_compose_msi_msg(struct irq_data *d, 
struct msi_msg *msg)
  */
 #define IRQS_PER_CHUNK_SHIFT   5
 #define IRQS_PER_CHUNK (1 << IRQS_PER_CHUNK_SHIFT)
+#define ITS_MAX_LPI_NRBITS (17) /* 128K LPIs */
 
 static unsigned long *lpi_bitmap;
 static u32 lpi_chunks;
+static u32 lpi_nrbits;
 static DEFINE_SPINLOCK(lpi_lock);
 
 static int its_lpi_to_chunk(int lpi)
@@ -789,26 +791,19 @@ static void its_lpi_free(struct event_lpi_map *map)
 }
 
 /*
- * We allocate 64kB for PROPBASE. That gives us at most 64K LPIs to
+ * We allocate memory for PROPBASE to cover 2 ^ lpi_nrbits LPIs to
  * deal with (one configuration byte per interrupt). PENDBASE has to
  * be 64kB aligned (one bit per LPI, plus 8192 bits for SPI/PPI/SGI).
  */
-#define LPI_PROPBASE_SZSZ_64K
-#define LPI_PENDBASE_SZ(LPI_PROPBASE_SZ / 8 + SZ_1K)
-
-/*
- * This is how many bits of ID we need, including the useless ones.
- */
-#define LPI_NRBITS ilog2(LPI_PROPBASE_SZ + SZ_8K)
 
 #define LPI_PROP_DEFAULT_PRIO  0xa0
 
 static int __init its_alloc_lpi_tables(void)
 {
+   u32 size = ALIGN(BIT(lpi_nrbits), SZ_64K);
phys_addr_t paddr;
 
-   gic_rdists->prop_page = alloc_pages(GFP_NOWAIT,
-  get_order(LPI_PROPBASE_SZ));
+   gic_rdists->prop_page = alloc_pages(GFP_NOWAIT, get_order(size));
if (!gic_rdists->prop_page) {
pr_err("Failed to allocate PROPBASE\n");
return -ENOMEM;
@@ -820,10 +815,10 @@ static int __init its_alloc_lpi_tables(void)
/* Priority 0xa0, Group-1, disabled */
memset(page_address(gic_rdists->prop_page),
   LPI_PROP_DEFAULT_PRIO | LPI_PROP_GROUP1,
-  LPI_PROPBASE_SZ);
+  size);
 
/* Make sure the GIC will observe the written configuration */
-   gic_flush_dcache_to_poc(page_address(gic_rdists->prop_page), 
LPI_PROPBASE_SZ);
+   gic_flush_dcache_to_poc(page_address(gic_rdists->prop_page), size);
 
return 0;
 }
@@ -1095,12 +1090,14 @@ static void its_cpu_init_lpis(void)
pend_page = gic_data_rdist()->pend_page;
if (!pend_page) {
phys_addr_t paddr;
+   u32 size;
/*
-* The pending pages have to be at least 64kB aligned,
-* hence the 'max(LPI_PENDBASE_SZ, SZ_64K)' below.
+* The pending pages have to be at least 64kB aligned
+* hence the 'ALIGN(BIT(lpi_nrbits)/8, SZ_64K)' below.
 */
+   size = ALIGN(BIT(lpi_nrbits)/8, SZ_64K);
pend_page = alloc_pages(GFP_NOWAIT | __GFP_ZERO,
-   get_order(max(LPI_PENDBASE_SZ, 
SZ_64K)));
+   get_order(size));
if (!pend_page) {
pr_err("Failed to allocate PENDBASE for CPU%d\n",
   smp_processor_id());
@@ -1108,7 +1105,7 @@ static void its_cpu_init_lpis(void)
}
 
/* Make sure the GIC will observe the zero-ed page */
-