dma_pool_free() scales poorly when the pool contains many pages because
pool_find_page() does a linear scan of all allocated pages.  Improve its
scalability by replacing the linear scan with a red-black tree lookup. 
In big O notation, this improves the algorithm from O(n^2) to O(n * log n).

Signed-off-by: Tony Battersby <to...@cybernetics.com>
---

I moved some code from dma_pool_destroy() into pool_free_page() to avoid code
repetition.

--- a/mm/dmapool.c
+++ b/mm/dmapool.c
@@ -15,11 +15,12 @@
  * Many older drivers still have their own code to do this.
  *
  * The current design of this allocator is fairly simple.  The pool is
- * represented by the 'struct dma_pool' which keeps a doubly-linked list of
- * allocated pages.  Each page in the page_list is split into blocks of at
- * least 'size' bytes.  Free blocks are tracked in an unsorted singly-linked
- * list of free blocks within the page.  Used blocks aren't tracked, but we
- * keep a count of how many are currently allocated from each page.
+ * represented by the 'struct dma_pool' which keeps a red-black tree of all
+ * allocated pages, keyed by DMA address for fast lookup when freeing.
+ * Each page in the page_tree is split into blocks of at least 'size' bytes.
+ * Free blocks are tracked in an unsorted singly-linked list of free blocks
+ * within the page.  Used blocks aren't tracked, but we keep a count of how
+ * many are currently allocated from each page.
  *
  * The avail_page_list keeps track of pages that have one or more free blocks
  * available to (re)allocate.  Pages are moved in and out of avail_page_list
@@ -41,13 +42,14 @@
 #include <linux/string.h>
 #include <linux/types.h>
 #include <linux/wait.h>
+#include <linux/rbtree.h>
 
 #if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB_DEBUG_ON)
 #define DMAPOOL_DEBUG 1
 #endif
 
 struct dma_pool {              /* the pool */
-       struct list_head page_list;
+       struct rb_root page_tree;
        struct list_head avail_page_list;
        spinlock_t lock;
        size_t size;
@@ -59,7 +61,7 @@ struct dma_pool {             /* the pool */
 };
 
 struct dma_page {              /* cacheable header for 'allocation' bytes */
-       struct list_head page_list;
+       struct rb_node page_node;
        struct list_head avail_page_link;
        void *vaddr;
        dma_addr_t dma;
@@ -78,6 +80,7 @@ show_pools(struct device *dev, struct de
        char *next;
        struct dma_page *page;
        struct dma_pool *pool;
+       struct rb_node *node;
 
        next = buf;
        size = PAGE_SIZE;
@@ -92,7 +95,10 @@ show_pools(struct device *dev, struct de
                unsigned blocks = 0;
 
                spin_lock_irq(&pool->lock);
-               list_for_each_entry(page, &pool->page_list, page_list) {
+               for (node = rb_first(&pool->page_tree);
+                    node;
+                    node = rb_next(node)) {
+                       page = rb_entry(node, struct dma_page, page_node);
                        pages++;
                        blocks += page->in_use;
                }
@@ -169,7 +175,7 @@ struct dma_pool *dma_pool_create(const c
 
        retval->dev = dev;
 
-       INIT_LIST_HEAD(&retval->page_list);
+       retval->page_tree = RB_ROOT;
        INIT_LIST_HEAD(&retval->avail_page_list);
        spin_lock_init(&retval->lock);
        retval->size = size;
@@ -210,6 +216,65 @@ struct dma_pool *dma_pool_create(const c
 }
 EXPORT_SYMBOL(dma_pool_create);
 
+/*
+ * Find the dma_page that manages the given DMA address.
+ */
+static struct dma_page *pool_find_page(struct dma_pool *pool, dma_addr_t dma)
+{
+       struct rb_node *node = pool->page_tree.rb_node;
+
+       while (node) {
+               struct dma_page *page =
+                       container_of(node, struct dma_page, page_node);
+
+               if (dma < page->dma)
+                       node = node->rb_left;
+               else if ((dma - page->dma) >= pool->allocation)
+                       node = node->rb_right;
+               else
+                       return page;
+       }
+       return NULL;
+}
+
+/*
+ * Insert a dma_page into the page_tree.
+ */
+static int pool_insert_page(struct dma_pool *pool, struct dma_page *new_page)
+{
+       dma_addr_t dma = new_page->dma;
+       struct rb_node **node = &(pool->page_tree.rb_node), *parent = NULL;
+
+       while (*node) {
+               struct dma_page *this_page =
+                       container_of(*node, struct dma_page, page_node);
+
+               parent = *node;
+               if (dma < this_page->dma)
+                       node = &((*node)->rb_left);
+               else if (likely((dma - this_page->dma) >= pool->allocation))
+                       node = &((*node)->rb_right);
+               else {
+                       /*
+                        * A page that overlaps the new DMA range is already
+                        * present in the tree.  This should not happen.
+                        */
+                       WARN(1,
+                            "%s: %s: DMA address overlap: old 0x%llx new 
0x%llx len %zu\n",
+                            pool->dev ? dev_name(pool->dev) : "(nodev)",
+                            pool->name, (u64) this_page->dma, (u64) dma,
+                            pool->allocation);
+                       return -1;
+               }
+       }
+
+       /* Add new node and rebalance tree. */
+       rb_link_node(&new_page->page_node, parent, node);
+       rb_insert_color(&new_page->page_node, &pool->page_tree);
+
+       return 0;
+}
+
 static void pool_initialise_page(struct dma_pool *pool, struct dma_page *page)
 {
        unsigned int offset = 0;
@@ -254,15 +319,36 @@ static inline bool is_page_busy(struct d
        return page->in_use != 0;
 }
 
-static void pool_free_page(struct dma_pool *pool, struct dma_page *page)
+static void pool_free_page(struct dma_pool *pool,
+                          struct dma_page *page,
+                          bool destroying_pool)
 {
-       dma_addr_t dma = page->dma;
-
+       if (destroying_pool && is_page_busy(page)) {
+               if (pool->dev)
+                       dev_err(pool->dev,
+                               "dma_pool_destroy %s, %p busy\n",
+                               pool->name, page->vaddr);
+               else
+                       pr_err("dma_pool_destroy %s, %p busy\n",
+                              pool->name, page->vaddr);
+               /* leak the still-in-use consistent memory */
+       } else {
 #ifdef DMAPOOL_DEBUG
-       memset(page->vaddr, POOL_POISON_FREED, pool->allocation);
+               memset(page->vaddr, POOL_POISON_FREED, pool->allocation);
 #endif
-       dma_free_coherent(pool->dev, pool->allocation, page->vaddr, dma);
-       list_del(&page->page_list);
+               dma_free_coherent(pool->dev,
+                                 pool->allocation,
+                                 page->vaddr,
+                                 page->dma);
+       }
+
+       /*
+        * If the pool is being destroyed, it is not safe to modify the
+        * page_tree while iterating over it, and it is also unnecessary since
+        * the whole tree will be discarded anyway.
+        */
+       if (!destroying_pool)
+               rb_erase(&page->page_node, &pool->page_tree);
        list_del(&page->avail_page_link);
        kfree(page);
 }
@@ -277,6 +363,7 @@ static void pool_free_page(struct dma_po
  */
 void dma_pool_destroy(struct dma_pool *pool)
 {
+       struct dma_page *page, *tmp_page;
        bool empty = false;
 
        if (unlikely(!pool))
@@ -292,24 +379,11 @@ void dma_pool_destroy(struct dma_pool *p
                device_remove_file(pool->dev, &dev_attr_pools);
        mutex_unlock(&pools_reg_lock);
 
-       while (!list_empty(&pool->page_list)) {
-               struct dma_page *page;
-               page = list_entry(pool->page_list.next,
-                                 struct dma_page, page_list);
-               if (is_page_busy(page)) {
-                       if (pool->dev)
-                               dev_err(pool->dev,
-                                       "dma_pool_destroy %s, %p busy\n",
-                                       pool->name, page->vaddr);
-                       else
-                               pr_err("dma_pool_destroy %s, %p busy\n",
-                                      pool->name, page->vaddr);
-                       /* leak the still-in-use consistent memory */
-                       list_del(&page->page_list);
-                       list_del(&page->avail_page_link);
-                       kfree(page);
-               } else
-                       pool_free_page(pool, page);
+       rbtree_postorder_for_each_entry_safe(page,
+                                            tmp_page,
+                                            &pool->page_tree,
+                                            page_node) {
+               pool_free_page(pool, page, true);
        }
 
        kfree(pool);
@@ -353,7 +427,15 @@ void *dma_pool_alloc(struct dma_pool *po
 
        spin_lock_irqsave(&pool->lock, flags);
 
-       list_add(&page->page_list, &pool->page_list);
+       if (unlikely(pool_insert_page(pool, page))) {
+               /*
+                * This should not happen, so something must have gone horribly
+                * wrong.  Instead of crashing, intentionally leak the memory
+                * and make for the exit.
+                */
+               spin_unlock_irqrestore(&pool->lock, flags);
+               return NULL;
+       }
        list_add(&page->avail_page_link, &pool->avail_page_list);
  ready:
        page->in_use++;
@@ -400,19 +482,6 @@ void *dma_pool_alloc(struct dma_pool *po
 }
 EXPORT_SYMBOL(dma_pool_alloc);
 
-static struct dma_page *pool_find_page(struct dma_pool *pool, dma_addr_t dma)
-{
-       struct dma_page *page;
-
-       list_for_each_entry(page, &pool->page_list, page_list) {
-               if (dma < page->dma)
-                       continue;
-               if ((dma - page->dma) < pool->allocation)
-                       return page;
-       }
-       return NULL;
-}
-
 /**
  * dma_pool_free - put block back into dma pool
  * @pool: the dma pool holding the block
@@ -484,7 +553,7 @@ void dma_pool_free(struct dma_pool *pool
        page->offset = offset;
        /*
         * Resist a temptation to do
-        *    if (!is_page_busy(page)) pool_free_page(pool, page);
+        *    if (!is_page_busy(page)) pool_free_page(pool, page, false);
         * Better have a few empty pages hang around.
         */
        spin_unlock_irqrestore(&pool->lock, flags);

_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Reply via email to