Current implementation allocates a single host page for EQ context memory. As the number of CPU cores increases, and since the number of required EQs depends on this number, this patch removes the hard coded limit and makes the allocation dependent on EQ entry size and the number of required EQs.
Signed-off-by: Eli Cohen <e...@mellanox.co.il> --- drivers/net/mlx4/eq.c | 42 ++++++++++++++++++++++++------------------ drivers/net/mlx4/main.c | 1 + drivers/net/mlx4/mlx4.h | 1 + include/linux/mlx4/device.h | 1 + 4 files changed, 27 insertions(+), 18 deletions(-) diff --git a/drivers/net/mlx4/eq.c b/drivers/net/mlx4/eq.c index b9ceddd..1d41b1a 100644 --- a/drivers/net/mlx4/eq.c +++ b/drivers/net/mlx4/eq.c @@ -530,29 +530,34 @@ int mlx4_map_eq_icm(struct mlx4_dev *dev, u64 icm_virt) { struct mlx4_priv *priv = mlx4_priv(dev); int ret; + int host_pages, icm_pages; + int i; - /* - * We assume that mapping one page is enough for the whole EQ - * context table. This is fine with all current HCAs, because - * we only use 32 EQs and each EQ uses 64 bytes of context - * memory, or 1 KB total. - */ + host_pages = ALIGN(min_t(int, dev->caps.num_eqs, num_possible_cpus() + 1) * + dev->caps.eqc_entry_size, PAGE_SIZE) >> PAGE_SHIFT; + priv->eq_table.order = ilog2(roundup_pow_of_two(host_pages)); priv->eq_table.icm_virt = icm_virt; - priv->eq_table.icm_page = alloc_page(GFP_HIGHUSER); + priv->eq_table.icm_page = alloc_pages(GFP_HIGHUSER, priv->eq_table.order); if (!priv->eq_table.icm_page) return -ENOMEM; priv->eq_table.icm_dma = pci_map_page(dev->pdev, priv->eq_table.icm_page, 0, - PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + PAGE_SIZE << priv->eq_table.order, + PCI_DMA_BIDIRECTIONAL); if (pci_dma_mapping_error(dev->pdev, priv->eq_table.icm_dma)) { - __free_page(priv->eq_table.icm_page); + __free_pages(priv->eq_table.icm_page, priv->eq_table.order); return -ENOMEM; } - ret = mlx4_MAP_ICM_page(dev, priv->eq_table.icm_dma, icm_virt); - if (ret) { - pci_unmap_page(dev->pdev, priv->eq_table.icm_dma, PAGE_SIZE, - PCI_DMA_BIDIRECTIONAL); - __free_page(priv->eq_table.icm_page); + icm_pages = (PAGE_SIZE / MLX4_ICM_PAGE_SIZE) * (1 << priv->eq_table.order); + for (i = 0; i < icm_pages; ++i) { + ret = mlx4_MAP_ICM_page(dev, priv->eq_table.icm_dma, icm_virt + i * MLX4_ICM_PAGE_SIZE); + if (ret) { + mlx4_UNMAP_ICM(dev, priv->eq_table.icm_virt, i); + pci_unmap_page(dev->pdev, priv->eq_table.icm_dma, PAGE_SIZE, + PCI_DMA_BIDIRECTIONAL); + __free_pages(priv->eq_table.icm_page, priv->eq_table.order); + break; + } } return ret; @@ -561,11 +566,12 @@ int mlx4_map_eq_icm(struct mlx4_dev *dev, u64 icm_virt) void mlx4_unmap_eq_icm(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); + int icm_pages = (PAGE_SIZE / MLX4_ICM_PAGE_SIZE) * (1 << priv->eq_table.order); - mlx4_UNMAP_ICM(dev, priv->eq_table.icm_virt, 1); - pci_unmap_page(dev->pdev, priv->eq_table.icm_dma, PAGE_SIZE, - PCI_DMA_BIDIRECTIONAL); - __free_page(priv->eq_table.icm_page); + mlx4_UNMAP_ICM(dev, priv->eq_table.icm_virt, icm_pages); + pci_unmap_page(dev->pdev, priv->eq_table.icm_dma, + PAGE_SIZE << priv->eq_table.order, PCI_DMA_BIDIRECTIONAL); + __free_pages(priv->eq_table.icm_page, priv->eq_table.order); } int mlx4_alloc_eq_table(struct mlx4_dev *dev) diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c index dac621b..872becd 100644 --- a/drivers/net/mlx4/main.c +++ b/drivers/net/mlx4/main.c @@ -207,6 +207,7 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.max_cqes = dev_cap->max_cq_sz - 1; dev->caps.reserved_cqs = dev_cap->reserved_cqs; dev->caps.reserved_eqs = dev_cap->reserved_eqs; + dev->caps.eqc_entry_size = dev_cap->eqc_entry_sz; dev->caps.mtts_per_seg = 1 << log_mtts_per_seg; dev->caps.reserved_mtts = DIV_ROUND_UP(dev_cap->reserved_mtts, dev->caps.mtts_per_seg); diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h index 5bd79c2..1a20fa3 100644 --- a/drivers/net/mlx4/mlx4.h +++ b/drivers/net/mlx4/mlx4.h @@ -211,6 +211,7 @@ struct mlx4_eq_table { struct mlx4_icm_table cmpt_table; int have_irq; u8 inta_pin; + int order; }; struct mlx4_srq_table { diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index ce7cc6c..8923c9b 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -206,6 +206,7 @@ struct mlx4_caps { int max_cqes; int reserved_cqs; int num_eqs; + int eqc_entry_size; int reserved_eqs; int num_comp_vectors; int num_mpts; -- 1.6.3.3 _______________________________________________ ewg mailing list ewg@lists.openfabrics.org http://lists.openfabrics.org/cgi-bin/mailman/listinfo/ewg