Eli Cohen wrote:
Since Linux may not merge adjacent pages into a single scatter entry through calls to dma_map_sg(), we check the special case of hugetlb pages which are likely to be mapped to coniguous dma addresses and if they are, take advantage of this. This will result in a significantly lower number of MTT segments used for registering hugetlb memory regions.
How about the one below - it fixes bugzilla #1569 (fix mapping for size that is not on page boundary): --- Since Linux may not merge adjacent pages into a single scatter entry through calls to dma_map_sg(), we check the special case of hugetlb pages which are likely to be mapped to coniguous dma addresses and if they are, take advantage of this. This will result in a significantly lower number of MTT segments used for registering hugetlb memory regions. Signed-off-by: Eli Cohen <e...@mellanox.co.il> --- drivers/infiniband/hw/mlx4/mr.c | 81 ++++++++++++++++++++++++++++++++++---- 1 files changed, 72 insertions(+), 9 deletions(-) Index: b/drivers/infiniband/hw/mlx4/mr.c =================================================================== --- a/drivers/infiniband/hw/mlx4/mr.c 2008-11-19 21:32:15.000000000 +0200 +++ b/drivers/infiniband/hw/mlx4/mr.c 2009-03-30 18:29:55.000000000 +0300 @@ -119,6 +119,70 @@ out: return err; } +static int handle_hugetlb_user_mr(struct ib_pd *pd, struct mlx4_ib_mr *mr, + u64 start, u64 virt_addr, int access_flags) +{ +#if defined(CONFIG_HUGETLB_PAGE) && !defined(__powerpc__) && !defined(__ia64__) + struct mlx4_ib_dev *dev = to_mdev(pd->device); + struct ib_umem_chunk *chunk; + unsigned dsize; + dma_addr_t daddr; + unsigned cur_size = 0; + dma_addr_t uninitialized_var(cur_addr); + int n; + struct ib_umem *umem = mr->umem; + u64 *arr; + int err = 0; + int i; + int j = 0; + int off = start & (HPAGE_SIZE - 1); + + n = DIV_ROUND_UP(off + umem->length, HPAGE_SIZE); + arr = kmalloc(n * sizeof *arr, GFP_KERNEL); + if (!arr) + return -ENOMEM; + + list_for_each_entry(chunk, &umem->chunk_list, list) + for (i = 0; i < chunk->nmap; ++i) { + daddr = sg_dma_address(&chunk->page_list[i]); + dsize = sg_dma_len(&chunk->page_list[i]); + if (!cur_size) { + cur_addr = daddr; + cur_size = dsize; + } else if (cur_addr + cur_size != daddr) { + err = -EINVAL; + goto out; + } else + cur_size += dsize; + + if (cur_size > HPAGE_SIZE) { + err = -EINVAL; + goto out; + } else if (cur_size == HPAGE_SIZE) { + cur_size = 0; + arr[j++] = cur_addr; + } + } + + if (cur_size) { + arr[j++] = cur_addr; + } + + err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, umem->length, + convert_access(access_flags), n, HPAGE_SHIFT, &mr->mmr); + if (err) + goto out; + + err = mlx4_write_mtt(dev->dev, &mr->mmr.mtt, 0, n, arr); + +out: + kfree(arr); + return err; +#else + return -ENOSYS; +#endif +} + struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata) @@ -140,17 +204,20 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct goto err_free; } - n = ib_umem_page_count(mr->umem); - shift = ilog2(mr->umem->page_size); - - err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length, - convert_access(access_flags), n, shift, &mr->mmr); - if (err) - goto err_umem; - - err = mlx4_ib_umem_write_mtt(dev, &mr->mmr.mtt, mr->umem); - if (err) - goto err_mr; + if (!mr->umem->hugetlb || + handle_hugetlb_user_mr(pd, mr, start, virt_addr, access_flags)) { + n = ib_umem_page_count(mr->umem); + shift = ilog2(mr->umem->page_size); + + err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length, + convert_access(access_flags), n, shift, &mr->mmr); + if (err) + goto err_umem; + + err = mlx4_ib_umem_write_mtt(dev, &mr->mmr.mtt, mr->umem); + if (err) + goto err_mr; + } err = mlx4_mr_enable(dev->dev, &mr->mmr); if (err) _______________________________________________ ewg mailing list ewg@lists.openfabrics.org http://lists.openfabrics.org/cgi-bin/mailman/listinfo/ewg