The device coherent memory uses the bitmap helper functions, which take an
order of PAGE_SIZE, that means the pages size is always a power of 2 as the
allocation region. For Example, allocating 33 MB from a 33 MB dma_mem region
requires 64MB free memory in that region.

Thus we can change to use bitmap_find_next_zero_area()/bitmap_set()/
bitmap_clear() to present the allocation coherent memory, and reduce the
allocation granularity to be one PAGE_SIZE.

Moreover from Arnd's description:
"I believe that bitmap_allocate_region() was chosen here because it is
more efficient than bitmap_find_next_zero_area(), at least that is the
explanation given in https://en.wikipedia.org/wiki/Buddy_memory_allocation.

It's quite possible that we don't actually care about efficiency of
dma_alloc_*() since a) it's supposed to be called very rarely, and
b) the overhead of accessing uncached memory is likely higher than the
search through a relatively small bitmap".

Thus I think we can convert to change the allocation granularity to be
one PAGE_SIZE replacing with new bitmap APIs, which will not cause
efficiency issue.

Signed-off-by: Baolin Wang <baolin.w...@linaro.org>
---
 drivers/base/dma-coherent.c |   54 +++++++++++++++++++++++--------------------
 include/linux/dma-mapping.h |    6 ++---
 2 files changed, 32 insertions(+), 28 deletions(-)

diff --git a/drivers/base/dma-coherent.c b/drivers/base/dma-coherent.c
index ce19832..4131540 100644
--- a/drivers/base/dma-coherent.c
+++ b/drivers/base/dma-coherent.c
@@ -143,34 +143,31 @@ void *dma_mark_declared_memory_occupied(struct device 
*dev,
                                        dma_addr_t device_addr, size_t size)
 {
        struct dma_coherent_mem *mem = dev->dma_mem;
-       int order = get_order(size);
        unsigned long flags;
-       int pos, err;
-
-       size += device_addr & ~PAGE_MASK;
+       int start_bit, nbits;
 
        if (!mem)
                return ERR_PTR(-EINVAL);
 
+       size += device_addr & ~PAGE_MASK;
+       nbits = (size + (1UL << PAGE_SHIFT) - 1) >> PAGE_SHIFT;
+
        spin_lock_irqsave(&mem->spinlock, flags);
-       pos = PFN_DOWN(device_addr - dma_get_device_base(dev, mem));
-       err = bitmap_allocate_region(mem->bitmap, pos, order);
-       if (!err)
-               mem->avail -= 1 << order;
+       start_bit = PFN_DOWN(device_addr - dma_get_device_base(dev, mem));
+       bitmap_set(mem->bitmap, start_bit, nbits);
+       mem->avail -= nbits;
        spin_unlock_irqrestore(&mem->spinlock, flags);
 
-       if (err != 0)
-               return ERR_PTR(err);
-       return mem->virt_base + (pos << PAGE_SHIFT);
+       return mem->virt_base + (start_bit << PAGE_SHIFT);
 }
 EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
 
 static void *__dma_alloc_from_coherent(struct dma_coherent_mem *mem,
                ssize_t size, dma_addr_t *dma_handle)
 {
-       int order = get_order(size);
+       int nbits = (size + (1UL << PAGE_SHIFT) - 1) >> PAGE_SHIFT;
        unsigned long flags;
-       int pageno;
+       int start_bit, end_bit;
        void *ret;
 
        spin_lock_irqsave(&mem->spinlock, flags);
@@ -178,16 +175,22 @@ static void *__dma_alloc_from_coherent(struct 
dma_coherent_mem *mem,
        if (unlikely(size > (mem->avail << PAGE_SHIFT)))
                goto err;
 
-       pageno = bitmap_find_free_region(mem->bitmap, mem->size, order);
-       if (unlikely(pageno < 0))
+       start_bit = 0;
+       end_bit = mem->size;
+
+       start_bit = bitmap_find_next_zero_area(mem->bitmap, end_bit, start_bit,
+                                              nbits, 0);
+       if (start_bit >= end_bit)
                goto err;
 
+       bitmap_set(mem->bitmap, start_bit, nbits);
+
        /*
         * Memory was found in the coherent area.
         */
-       *dma_handle = mem->device_base + (pageno << PAGE_SHIFT);
-       ret = mem->virt_base + (pageno << PAGE_SHIFT);
-       mem->avail -= 1 << order;
+       *dma_handle = mem->device_base + (start_bit << PAGE_SHIFT);
+       ret = mem->virt_base + (start_bit << PAGE_SHIFT);
+       mem->avail -= nbits;
        spin_unlock_irqrestore(&mem->spinlock, flags);
        memset(ret, 0, size);
        return ret;
@@ -241,16 +244,17 @@ void *dma_alloc_from_global_coherent(ssize_t size, 
dma_addr_t *dma_handle)
 }
 
 static int __dma_release_from_coherent(struct dma_coherent_mem *mem,
-                                      int order, void *vaddr)
+                                      int size, void *vaddr)
 {
        if (mem && vaddr >= mem->virt_base && vaddr <
                   (mem->virt_base + (mem->size << PAGE_SHIFT))) {
-               int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
+               int nbits = (size + (1UL << PAGE_SHIFT) - 1) >> PAGE_SHIFT;
+               int start_bit = (vaddr - mem->virt_base) >> PAGE_SHIFT;
                unsigned long flags;
 
                spin_lock_irqsave(&mem->spinlock, flags);
-               bitmap_release_region(mem->bitmap, page, order);
-               mem->avail += 1 << order;
+               bitmap_clear(mem->bitmap, start_bit, nbits);
+               mem->avail += nbits;
                spin_unlock_irqrestore(&mem->spinlock, flags);
                return 1;
        }
@@ -260,7 +264,7 @@ static int __dma_release_from_coherent(struct 
dma_coherent_mem *mem,
 /**
  * dma_release_from_dev_coherent() - free memory to device coherent memory pool
  * @dev:       device from which the memory was allocated
- * @order:     the order of pages allocated
+ * @size:      size of release memory area
  * @vaddr:     virtual address of allocated pages
  *
  * This checks whether the memory was allocated from the per-device
@@ -269,11 +273,11 @@ static int __dma_release_from_coherent(struct 
dma_coherent_mem *mem,
  * Returns 1 if we correctly released the memory, or 0 if the caller should
  * proceed with releasing memory from generic pools.
  */
-int dma_release_from_dev_coherent(struct device *dev, int order, void *vaddr)
+int dma_release_from_dev_coherent(struct device *dev, int size, void *vaddr)
 {
        struct dma_coherent_mem *mem = dev_get_coherent_memory(dev);
 
-       return __dma_release_from_coherent(mem, order, vaddr);
+       return __dma_release_from_coherent(mem, size, vaddr);
 }
 EXPORT_SYMBOL(dma_release_from_dev_coherent);
 
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index f8ab1c0..29ae92e 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -162,7 +162,7 @@ static inline int is_device_dma_capable(struct device *dev)
  */
 int dma_alloc_from_dev_coherent(struct device *dev, ssize_t size,
                                       dma_addr_t *dma_handle, void **ret);
-int dma_release_from_dev_coherent(struct device *dev, int order, void *vaddr);
+int dma_release_from_dev_coherent(struct device *dev, int size, void *vaddr);
 
 int dma_mmap_from_dev_coherent(struct device *dev, struct vm_area_struct *vma,
                            void *cpu_addr, size_t size, int *ret);
@@ -174,7 +174,7 @@ int dma_mmap_from_global_coherent(struct vm_area_struct 
*vma, void *cpu_addr,
 
 #else
 #define dma_alloc_from_dev_coherent(dev, size, handle, ret) (0)
-#define dma_release_from_dev_coherent(dev, order, vaddr) (0)
+#define dma_release_from_dev_coherent(dev, size, vaddr) (0)
 #define dma_mmap_from_dev_coherent(dev, vma, vaddr, order, ret) (0)
 
 static inline void *dma_alloc_from_global_coherent(ssize_t size,
@@ -540,7 +540,7 @@ static inline void dma_free_attrs(struct device *dev, 
size_t size,
        BUG_ON(!ops);
        WARN_ON(irqs_disabled());
 
-       if (dma_release_from_dev_coherent(dev, get_order(size), cpu_addr))
+       if (dma_release_from_dev_coherent(dev, size, cpu_addr))
                return;
 
        if (!ops->free || !cpu_addr)
-- 
1.7.9.5

_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Reply via email to