Re: [PATCH v4 4/9] dmapool: improve scalability of dma_pool_alloc

2018-11-12 Thread Matthew Wilcox
On Mon, Nov 12, 2018 at 10:43:25AM -0500, Tony Battersby wrote:
> dma_pool_alloc() scales poorly when allocating a large number of pages
> because it does a linear scan of all previously-allocated pages before
> allocating a new one.  Improve its scalability by maintaining a separate
> list of pages that have free blocks ready to (re)allocate.  In big O
> notation, this improves the algorithm from O(n^2) to O(n).
> 
> Signed-off-by: Tony Battersby 

Acked-by: Matthew Wilcox 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v4 4/9] dmapool: improve scalability of dma_pool_alloc

2018-11-12 Thread Tony Battersby
dma_pool_alloc() scales poorly when allocating a large number of pages
because it does a linear scan of all previously-allocated pages before
allocating a new one.  Improve its scalability by maintaining a separate
list of pages that have free blocks ready to (re)allocate.  In big O
notation, this improves the algorithm from O(n^2) to O(n).

Signed-off-by: Tony Battersby 
---
--- linux/mm/dmapool.c.orig 2018-08-03 16:16:49.0 -0400
+++ linux/mm/dmapool.c  2018-08-03 16:45:33.0 -0400
@@ -15,11 +15,16 @@
  * Many older drivers still have their own code to do this.
  *
  * The current design of this allocator is fairly simple.  The pool is
- * represented by the 'struct dma_pool' which keeps a doubly-linked list of
- * allocated pages.  Each page in the page_list is split into blocks of at
- * least 'size' bytes.  Free blocks are tracked in an unsorted singly-linked
- * list of free blocks within the page.  Used blocks aren't tracked, but we
- * keep a count of how many are currently allocated from each page.
+ * represented by the 'struct dma_pool'.  Each allocated page is split into
+ * blocks of at least 'size' bytes.  Free blocks are tracked in an unsorted
+ * singly-linked list of free blocks within the page.  Used blocks aren't
+ * tracked, but we keep a count of how many are currently allocated from each
+ * page.
+ *
+ * The pool keeps two doubly-linked list of allocated pages.  The 'available'
+ * list tracks pages that have one or more free blocks, and the 'full' list
+ * tracks pages that have no free blocks.  Pages are moved from one list to
+ * the other as their blocks are allocated and freed.
  */
 
 #include 
@@ -43,7 +48,10 @@
 #endif
 
 struct dma_pool {  /* the pool */
-   struct list_head page_list;
+#define POOL_FULL_IDX   0
+#define POOL_AVAIL_IDX  1
+#define POOL_MAX_IDX2
+   struct list_head page_list[POOL_MAX_IDX];
spinlock_t lock;
size_t size;
struct device *dev;
@@ -54,7 +62,7 @@ struct dma_pool { /* the pool */
 };
 
 struct dma_page {  /* cacheable header for 'allocation' bytes */
-   struct list_head page_list;
+   struct list_head dma_list;
void *vaddr;
dma_addr_t dma;
unsigned int in_use;
@@ -70,7 +78,6 @@ show_pools(struct device *dev, struct de
unsigned temp;
unsigned size;
char *next;
-   struct dma_page *page;
struct dma_pool *pool;
 
next = buf;
@@ -84,11 +91,18 @@ show_pools(struct device *dev, struct de
list_for_each_entry(pool, &dev->dma_pools, pools) {
unsigned pages = 0;
unsigned blocks = 0;
+   int list_idx;
 
spin_lock_irq(&pool->lock);
-   list_for_each_entry(page, &pool->page_list, page_list) {
-   pages++;
-   blocks += page->in_use;
+   for (list_idx = 0; list_idx < POOL_MAX_IDX; list_idx++) {
+   struct dma_page *page;
+
+   list_for_each_entry(page,
+   &pool->page_list[list_idx],
+   dma_list) {
+   pages++;
+   blocks += page->in_use;
+   }
}
spin_unlock_irq(&pool->lock);
 
@@ -163,7 +177,8 @@ struct dma_pool *dma_pool_create(const c
 
retval->dev = dev;
 
-   INIT_LIST_HEAD(&retval->page_list);
+   INIT_LIST_HEAD(&retval->page_list[POOL_FULL_IDX]);
+   INIT_LIST_HEAD(&retval->page_list[POOL_AVAIL_IDX]);
spin_lock_init(&retval->lock);
retval->size = size;
retval->boundary = boundary;
@@ -252,7 +267,7 @@ static void pool_free_page(struct dma_po
void *vaddr = page->vaddr;
dma_addr_t dma = page->dma;
 
-   list_del(&page->page_list);
+   list_del(&page->dma_list);
 
if (is_page_busy(page)) {
dev_err(pool->dev,
@@ -278,8 +293,8 @@ static void pool_free_page(struct dma_po
  */
 void dma_pool_destroy(struct dma_pool *pool)
 {
-   struct dma_page *page;
bool empty = false;
+   int list_idx;
 
if (unlikely(!pool))
return;
@@ -294,10 +309,15 @@ void dma_pool_destroy(struct dma_pool *p
device_remove_file(pool->dev, &dev_attr_pools);
mutex_unlock(&pools_reg_lock);
 
-   while ((page = list_first_entry_or_null(&pool->page_list,
-   struct dma_page,
-   page_list))) {
-   pool_free_page(pool, page);
+   for (list_idx = 0; list_idx < POOL_MAX_IDX; list_idx++) {
+   struct dma_page *page;
+
+   while ((page = list_first_entry_or_null(
+   &pool->page_list[list_idx],
+   struct dma_page,
+