On Tue, 28 Jun 2022, Chao Gao wrote:

> Currently, each slot tracks the number of contiguous free slots starting
> from itself. It helps to quickly check if there are enough contiguous
> entries when dealing with an allocation request. But maintaining this
> information can leads to some overhead. Specifically, if a slot is
> allocated/freed, preceding slots may need to be updated as the number
> of contiguous free slots can change. This process may access memory
> scattering over multiple cachelines.
> 
> To reduce the overhead of maintaining the number of contiguous free
> entries, use a global bitmap to track free slots; each bit represents
> if a slot is available. The number of contiguous free slots can be
> calculated by counting the number of consecutive 1s in the bitmap.
> 
> Tests show that the average cost of freeing slots drops by 120 cycles
> while the average cost of allocation increases by 20 cycles. Overall,
> 100 cycles are saved from a pair of allocation and freeing.
> 
> Signed-off-by: Chao Gao <chao....@intel.com>

Reviewed-by: Ilpo Järvinen <ilpo.jarvi...@linux.intel.com>

One nit below.

> ---
>  include/linux/swiotlb.h |  6 ++--
>  kernel/dma/swiotlb.c    | 64 ++++++++++++++++++++---------------------
>  2 files changed, 34 insertions(+), 36 deletions(-)
> 
> diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
> index 7ed35dd3de6e..c3eab237991a 100644
> --- a/include/linux/swiotlb.h
> +++ b/include/linux/swiotlb.h
> @@ -78,8 +78,6 @@ extern enum swiotlb_force swiotlb_force;
>   *           @end. For default swiotlb, this is command line adjustable via
>   *           setup_io_tlb_npages.
>   * @used:    The number of used IO TLB block.
> - * @list:    The free list describing the number of free entries available
> - *           from each index.
>   * @index:   The index to start searching in the next round.
>   * @orig_addr:       The original address corresponding to a mapped entry.
>   * @alloc_size:      Size of the allocated buffer.
> @@ -89,6 +87,8 @@ extern enum swiotlb_force swiotlb_force;
>   * @late_alloc:      %true if allocated using the page allocator
>   * @force_bounce: %true if swiotlb bouncing is forced
>   * @for_alloc:  %true if the pool is used for memory allocation
> + * @bitmap:  The bitmap used to track free entries. 1 in bit X means the slot
> + *           indexed by X is free.
>   */
>  struct io_tlb_mem {
>       phys_addr_t start;
> @@ -105,8 +105,8 @@ struct io_tlb_mem {
>       struct io_tlb_slot {
>               phys_addr_t orig_addr;
>               size_t alloc_size;
> -             unsigned int list;
>       } *slots;
> +     unsigned long *bitmap;
>  };
>  extern struct io_tlb_mem io_tlb_default_mem;
>  
> diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> index cb50f8d38360..d7f68c0af7f5 100644
> --- a/kernel/dma/swiotlb.c
> +++ b/kernel/dma/swiotlb.c
> @@ -207,7 +207,7 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem 
> *mem, phys_addr_t start,
>  
>       spin_lock_init(&mem->lock);
>       for (i = 0; i < mem->nslabs; i++) {
> -             mem->slots[i].list = IO_TLB_SEGSIZE - io_tlb_offset(i);
> +             __set_bit(i, mem->bitmap);
>               mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
>               mem->slots[i].alloc_size = 0;
>       }
> @@ -274,6 +274,11 @@ void __init swiotlb_init_remap(bool addressing_limit, 
> unsigned int flags,
>               panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
>                     __func__, alloc_size, PAGE_SIZE);
>  
> +     mem->bitmap = memblock_alloc(BITS_TO_BYTES(nslabs), SMP_CACHE_BYTES);
> +     if (!mem->bitmap)
> +             panic("%s: Failed to allocate %lu bytes align=0x%x\n",
> +                   __func__, DIV_ROUND_UP(nslabs, BITS_PER_BYTE), 
> SMP_CACHE_BYTES);
> +
>       swiotlb_init_io_tlb_mem(mem, __pa(tlb), nslabs, flags, false);
>  
>       if (flags & SWIOTLB_VERBOSE)
> @@ -337,10 +342,13 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask,
>                       (PAGE_SIZE << order) >> 20);
>       }
>  
> +     mem->bitmap = bitmap_zalloc(nslabs, GFP_KERNEL);
>       mem->slots = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
>               get_order(array_size(sizeof(*mem->slots), nslabs)));
> -     if (!mem->slots) {
> +     if (!mem->slots || !mem->bitmap) {
>               free_pages((unsigned long)vstart, order);
> +             bitmap_free(mem->bitmap);
> +             kfree(mem->slots);
>               return -ENOMEM;
>       }
>  
> @@ -498,7 +506,7 @@ static int swiotlb_find_slots(struct device *dev, 
> phys_addr_t orig_addr,
>       unsigned int iotlb_align_mask =
>               dma_get_min_align_mask(dev) & ~(IO_TLB_SIZE - 1);
>       unsigned int nslots = nr_slots(alloc_size), stride;
> -     unsigned int index, wrap, count = 0, i;
> +     unsigned int index, wrap, i;
>       unsigned int offset = swiotlb_align_offset(dev, orig_addr);
>       unsigned long flags;
>  
> @@ -514,6 +522,9 @@ static int swiotlb_find_slots(struct device *dev, 
> phys_addr_t orig_addr,
>               stride = max(stride, stride << (PAGE_SHIFT - IO_TLB_SHIFT));
>       stride = max(stride, (alloc_align_mask >> IO_TLB_SHIFT) + 1);
>  
> +     /* slots shouldn't cross one segment */
> +     max_slots = min_t(unsigned long, max_slots, IO_TLB_SEGSIZE);
> +
>       spin_lock_irqsave(&mem->lock, flags);
>       if (unlikely(nslots > mem->nslabs - mem->used))
>               goto not_found;
> @@ -535,8 +546,15 @@ static int swiotlb_find_slots(struct device *dev, 
> phys_addr_t orig_addr,
>               if (!iommu_is_span_boundary(index, nslots,
>                                           nr_slots(tbl_dma_addr),
>                                           max_slots)) {
> -                     if (mem->slots[index].list >= nslots)
> +                     if (find_next_zero_bit(mem->bitmap, index + nslots, 
> index) ==
> +                                     index + nslots)
>                               goto found;
> +             } else {
> +                     /*
> +                      * Remaining slots between current one and the next
> +                      * bounary cannot meet our requirement.

bounary -> boundary


-- 
 i.
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Reply via email to