DMA window size needs to be big enough to span all memory segment's
physical addresses. We do not need multiple levels of IOMMU tables
as we already span ~70TB of physical memory with 16MB hugepages.

Signed-off-by: Jonas Pfefferle <j...@zurich.ibm.com>
---
v2:
* roundup to next power 2 function without loop.

lib/librte_eal/linuxapp/eal/eal_vfio.c | 42 +++++++++++++++++++++++++++++++---
 1 file changed, 39 insertions(+), 3 deletions(-)

diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c 
b/lib/librte_eal/linuxapp/eal/eal_vfio.c
index 946df7e..a3f9977 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c
@@ -722,6 +722,35 @@ vfio_type1_dma_map(int vfio_container_fd)
        return 0;
 }
 
+static inline int
+clz64(uint64_t val)
+{
+       return val ? __builtin_clzll(val) : 64;
+}
+
+static inline bool
+is_power_of_2(uint64_t value)
+{
+       if (!value)
+               return false;
+
+       return !(value & (value - 1));
+}
+
+static inline uint64_t
+roundup_next_pow2(uint64_t value)
+{
+       uint8_t nlz = clz64(value);
+
+       if (is_power_of_2(value))
+               return value;
+
+       if (!nlz)
+               return 0;
+
+       return 1ULL << (64 - nlz);
+}
+
 static int
 vfio_spapr_dma_map(int vfio_container_fd)
 {
@@ -759,10 +788,12 @@ vfio_spapr_dma_map(int vfio_container_fd)
                return -1;
        }
 
-       /* calculate window size based on number of hugepages configured */
-       create.window_size = rte_eal_get_physmem_size();
+       /* physicaly pages are sorted descending i.e. ms[0].phys_addr is max */
+       /* create DMA window from 0 to max(phys_addr + len) */
+       /* sPAPR requires window size to be a power of 2 */
+       create.window_size = roundup_next_pow2(ms[0].phys_addr + ms[0].len);
        create.page_shift = __builtin_ctzll(ms->hugepage_sz);
-       create.levels = 2;
+       create.levels = 1;
 
        ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_CREATE, &create);
        if (ret) {
@@ -771,6 +802,11 @@ vfio_spapr_dma_map(int vfio_container_fd)
                return -1;
        }
 
+       if (create.start_addr != 0) {
+               RTE_LOG(ERR, EAL, "  DMA window start address != 0\n");
+               return -1;
+       }
+
        /* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */
        for (i = 0; i < RTE_MAX_MEMSEG; i++) {
                struct vfio_iommu_type1_dma_map dma_map;
-- 
2.7.4

Reply via email to