Re: [PATCH 1/7] drm/ttm: add pool wc/uc page allocator V2

2010-04-01 Thread Jerome Glisse
On Sun, Mar 28, 2010 at 09:16:01PM +0300, Pauli Nieminen wrote:
 On AGP system we might allocate/free routinely uncached or wc memory,
 changing page from cached (wb) to uc or wc is very expensive and involves
 a lot of flushing. To improve performance this allocator use a pool
 of uc,wc pages.
 
 Pools are protected with spinlocks to allow multiple threads to allocate pages
 simultanously. Expensive operations are done outside of spinlock to maximize
 concurrency.
 
 Pools are linked lists of pages that were recently freed. mm shrink callback
 allows kernel to claim back pages when they are required for something else.
 
 Fixes:
 * set_pages_array_wb handles highmem pages so we don't have to remove them
   from pool.
 * Add count parameter to ttm_put_pages to avoid looping in free code.
 * Change looping from _safe to normal in pool fill error path.
 * Initialize sum variable and make the loop prettier in get_num_unused_pages
 
 Based on Jerome Glisse's and Dave Airlie's pool allocator.
 
 Signed-off-by: Jerome Glisse jgli...@redhat.com
 Signed-off-by: Dave Airlie airl...@redhat.com
 Signed-off-by: Pauli Nieminen suok...@gmail.com

I think there is only one issue left see below, once yo got that one
fixed you got my ACK

Cheers,
Jerome

 ---
  drivers/gpu/drm/ttm/Makefile |2 +-
  drivers/gpu/drm/ttm/ttm_memory.c |7 +-
  drivers/gpu/drm/ttm/ttm_page_alloc.c |  705 
 ++
  drivers/gpu/drm/ttm/ttm_tt.c |   44 +--
  include/drm/ttm/ttm_page_alloc.h |   70 
  5 files changed, 803 insertions(+), 25 deletions(-)
  create mode 100644 drivers/gpu/drm/ttm/ttm_page_alloc.c
  create mode 100644 include/drm/ttm/ttm_page_alloc.h
 
 diff --git a/drivers/gpu/drm/ttm/Makefile b/drivers/gpu/drm/ttm/Makefile
 index 1e138f5..4256e20 100644
 --- a/drivers/gpu/drm/ttm/Makefile
 +++ b/drivers/gpu/drm/ttm/Makefile
 @@ -4,6 +4,6 @@
  ccflags-y := -Iinclude/drm
  ttm-y := ttm_agp_backend.o ttm_memory.o ttm_tt.o ttm_bo.o \
   ttm_bo_util.o ttm_bo_vm.o ttm_module.o ttm_global.o \
 - ttm_object.o ttm_lock.o ttm_execbuf_util.o
 + ttm_object.o ttm_lock.o ttm_execbuf_util.o ttm_page_alloc.o
  
  obj-$(CONFIG_DRM_TTM) += ttm.o
 diff --git a/drivers/gpu/drm/ttm/ttm_memory.c 
 b/drivers/gpu/drm/ttm/ttm_memory.c
 index eb143e0..72f31aa 100644
 --- a/drivers/gpu/drm/ttm/ttm_memory.c
 +++ b/drivers/gpu/drm/ttm/ttm_memory.c
 @@ -27,6 +27,7 @@
  
  #include ttm/ttm_memory.h
  #include ttm/ttm_module.h
 +#include ttm/ttm_page_alloc.h
  #include linux/spinlock.h
  #include linux/sched.h
  #include linux/wait.h
 @@ -394,6 +395,7 @@ int ttm_mem_global_init(struct ttm_mem_global *glob)
  Zone %7s: Available graphics memory: %llu kiB.\n,
  zone-name, (unsigned long long) zone-max_mem  10);
   }
 + ttm_page_alloc_init(glob-zone_kernel-max_mem/(2*PAGE_SIZE));
   return 0;
  out_no_zone:
   ttm_mem_global_release(glob);
 @@ -406,6 +408,9 @@ void ttm_mem_global_release(struct ttm_mem_global *glob)
   unsigned int i;
   struct ttm_mem_zone *zone;
  
 + /* let the page allocator first stop the shrink work. */
 + ttm_page_alloc_fini();
 +
   flush_workqueue(glob-swap_queue);
   destroy_workqueue(glob-swap_queue);
   glob-swap_queue = NULL;
 @@ -413,7 +418,7 @@ void ttm_mem_global_release(struct ttm_mem_global *glob)
   zone = glob-zones[i];
   kobject_del(zone-kobj);
   kobject_put(zone-kobj);
 - }
 + }
   kobject_del(glob-kobj);
   kobject_put(glob-kobj);
  }
 diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c 
 b/drivers/gpu/drm/ttm/ttm_page_alloc.c
 new file mode 100644
 index 000..5029fd0
 --- /dev/null
 +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
 @@ -0,0 +1,705 @@
 +/*
 + * Copyright (c) Red Hat Inc.
 +
 + * Permission is hereby granted, free of charge, to any person obtaining a
 + * copy of this software and associated documentation files (the Software),
 + * to deal in the Software without restriction, including without limitation
 + * the rights to use, copy, modify, merge, publish, distribute, sub license,
 + * and/or sell copies of the Software, and to permit persons to whom the
 + * Software is furnished to do so, subject to the following conditions:
 + *
 + * The above copyright notice and this permission notice (including the
 + * next paragraph) shall be included in all copies or substantial portions
 + * of the Software.
 + *
 + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 + * DEALINGS IN THE SOFTWARE.
 

Re: [PATCH 1/7] drm/ttm: add pool wc/uc page allocator V2

2010-04-01 Thread Pauli Nieminen
On Thu, Apr 1, 2010 at 12:52 PM, Jerome Glisse gli...@freedesktop.org wrote:
 On Sun, Mar 28, 2010 at 09:16:01PM +0300, Pauli Nieminen wrote:
 On AGP system we might allocate/free routinely uncached or wc memory,
 changing page from cached (wb) to uc or wc is very expensive and involves
 a lot of flushing. To improve performance this allocator use a pool
 of uc,wc pages.

 Pools are protected with spinlocks to allow multiple threads to allocate 
 pages
 simultanously. Expensive operations are done outside of spinlock to maximize
 concurrency.

 Pools are linked lists of pages that were recently freed. mm shrink callback
 allows kernel to claim back pages when they are required for something else.

 Fixes:
 * set_pages_array_wb handles highmem pages so we don't have to remove them
   from pool.
 * Add count parameter to ttm_put_pages to avoid looping in free code.
 * Change looping from _safe to normal in pool fill error path.
 * Initialize sum variable and make the loop prettier in get_num_unused_pages

 Based on Jerome Glisse's and Dave Airlie's pool allocator.

 Signed-off-by: Jerome Glisse jgli...@redhat.com
 Signed-off-by: Dave Airlie airl...@redhat.com
 Signed-off-by: Pauli Nieminen suok...@gmail.com

 I think there is only one issue left see below, once yo got that one
 fixed you got my ACK

 Cheers,
 Jerome

 +
 +/**
 + * Free pages from pool.
 + *
 + * To prevent hogging the ttm_swap process we only free NUM_PAGES_TO_ALLOC
 + * number of pages in one go.
 + *
 + * @pool: to free the pages from
 + * @free_all: If set to true will free all pages in pool
 + **/
 +static int ttm_page_pool_free(struct ttm_page_pool *pool, unsigned nr_free)
 +{
 +     unsigned long irq_flags;
 +     struct page *p;
 +     struct page **pages_to_free;
 +     unsigned freed_pages, npages_to_free = nr_free;
 +     if (NUM_PAGES_TO_ALLOC  nr_free)
 +             npages_to_free = NUM_PAGES_TO_ALLOC;
 +
 +     pages_to_free = kmalloc(npages_to_free * sizeof(struct page *),
 +                     GFP_KERNEL);
 +     if (!pages_to_free) {
 +             printk(KERN_ERR Failed to allocate memory for pool free 
 operation.\n);
 +             return 0;
 +     }
 +
 +restart:
 +     spin_lock_irqsave(pool-lock, irq_flags);
 +
 +     freed_pages = 0;
 +
 +     list_for_each_entry_reverse(p, pool-list, lru) {
 +             if (freed_pages = npages_to_free)
 +                     break;
 +
 +             pages_to_free[freed_pages++] = p;
 +             /* We can only remove NUM_PAGES_TO_ALLOC at a time. */
 +             if (freed_pages = NUM_PAGES_TO_ALLOC) {
 +                     /* remove range of page sfrom the pool */
 +                     __list_del(p-lru.prev, pool-list);
 +
 +                     ttm_pool_update_free_locked(pool, freed_pages);
 +                     /**
 +                      * Because changing page caching is costly
 +                      * we unlock the pool to prevent stalling.
 +                      */
 +                     spin_unlock_irqrestore(pool-lock, irq_flags);
 +
 +                     ttm_pages_put(pages_to_free, freed_pages);
 +                     if (likely(nr_free != FREE_ALL_PAGES))
 +                             nr_free -= freed_pages;
 +
 +                     if (NUM_PAGES_TO_ALLOC = nr_free)
 +                             npages_to_free = nr_free;
 +                     else
 +                             npages_to_free = NUM_PAGES_TO_ALLOC;
 +
 +                     /* free all so restart the processing */
 +                     if (nr_free)
 +                             goto restart;
 +

 (1) (see below)

 +                     goto out;
 +
 +             }
 +     }
 +
 +
 +     /* remove range of pages from the pool */
 +     if (freed_pages) {
 +             __list_del(p-lru, pool-list);
 +
 +             ttm_pool_update_free_locked(pool, freed_pages);
 +             nr_free -= freed_pages;
 +     }
 +

 if we reach this if through (1) it will fails big time as p has
 already been removed from the list (list_del not list_del_init)
 I think all you need is set freed_page to 0 at (1).


This is impossible code path without freed_pages being at correct
value. Because freed_pages is set to zero in restart code path.

But I guess it would be simpler code if setting zero is done inside
loop before restarting the loop.


 +     spin_unlock_irqrestore(pool-lock, irq_flags);
 +
 +     if (freed_pages)
 +             ttm_pages_put(pages_to_free, freed_pages);
 +out:

We jump from (1) to here if there is no more work to do.

Reason is that in (1) we are outside of spin_lock so we can't jump
into spin_lock from there.

 +     kfree(pages_to_free);
 +     return nr_free;
 +}
 +

--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.

[PATCH 1/7] drm/ttm: add pool wc/uc page allocator V2

2010-03-28 Thread Pauli Nieminen
On AGP system we might allocate/free routinely uncached or wc memory,
changing page from cached (wb) to uc or wc is very expensive and involves
a lot of flushing. To improve performance this allocator use a pool
of uc,wc pages.

Pools are protected with spinlocks to allow multiple threads to allocate pages
simultanously. Expensive operations are done outside of spinlock to maximize
concurrency.

Pools are linked lists of pages that were recently freed. mm shrink callback
allows kernel to claim back pages when they are required for something else.

Fixes:
* set_pages_array_wb handles highmem pages so we don't have to remove them
  from pool.
* Add count parameter to ttm_put_pages to avoid looping in free code.
* Change looping from _safe to normal in pool fill error path.
* Initialize sum variable and make the loop prettier in get_num_unused_pages

Based on Jerome Glisse's and Dave Airlie's pool allocator.

Signed-off-by: Jerome Glisse jgli...@redhat.com
Signed-off-by: Dave Airlie airl...@redhat.com
Signed-off-by: Pauli Nieminen suok...@gmail.com
---
 drivers/gpu/drm/ttm/Makefile |2 +-
 drivers/gpu/drm/ttm/ttm_memory.c |7 +-
 drivers/gpu/drm/ttm/ttm_page_alloc.c |  705 ++
 drivers/gpu/drm/ttm/ttm_tt.c |   44 +--
 include/drm/ttm/ttm_page_alloc.h |   70 
 5 files changed, 803 insertions(+), 25 deletions(-)
 create mode 100644 drivers/gpu/drm/ttm/ttm_page_alloc.c
 create mode 100644 include/drm/ttm/ttm_page_alloc.h

diff --git a/drivers/gpu/drm/ttm/Makefile b/drivers/gpu/drm/ttm/Makefile
index 1e138f5..4256e20 100644
--- a/drivers/gpu/drm/ttm/Makefile
+++ b/drivers/gpu/drm/ttm/Makefile
@@ -4,6 +4,6 @@
 ccflags-y := -Iinclude/drm
 ttm-y := ttm_agp_backend.o ttm_memory.o ttm_tt.o ttm_bo.o \
ttm_bo_util.o ttm_bo_vm.o ttm_module.o ttm_global.o \
-   ttm_object.o ttm_lock.o ttm_execbuf_util.o
+   ttm_object.o ttm_lock.o ttm_execbuf_util.o ttm_page_alloc.o
 
 obj-$(CONFIG_DRM_TTM) += ttm.o
diff --git a/drivers/gpu/drm/ttm/ttm_memory.c b/drivers/gpu/drm/ttm/ttm_memory.c
index eb143e0..72f31aa 100644
--- a/drivers/gpu/drm/ttm/ttm_memory.c
+++ b/drivers/gpu/drm/ttm/ttm_memory.c
@@ -27,6 +27,7 @@
 
 #include ttm/ttm_memory.h
 #include ttm/ttm_module.h
+#include ttm/ttm_page_alloc.h
 #include linux/spinlock.h
 #include linux/sched.h
 #include linux/wait.h
@@ -394,6 +395,7 @@ int ttm_mem_global_init(struct ttm_mem_global *glob)
   Zone %7s: Available graphics memory: %llu kiB.\n,
   zone-name, (unsigned long long) zone-max_mem  10);
}
+   ttm_page_alloc_init(glob-zone_kernel-max_mem/(2*PAGE_SIZE));
return 0;
 out_no_zone:
ttm_mem_global_release(glob);
@@ -406,6 +408,9 @@ void ttm_mem_global_release(struct ttm_mem_global *glob)
unsigned int i;
struct ttm_mem_zone *zone;
 
+   /* let the page allocator first stop the shrink work. */
+   ttm_page_alloc_fini();
+
flush_workqueue(glob-swap_queue);
destroy_workqueue(glob-swap_queue);
glob-swap_queue = NULL;
@@ -413,7 +418,7 @@ void ttm_mem_global_release(struct ttm_mem_global *glob)
zone = glob-zones[i];
kobject_del(zone-kobj);
kobject_put(zone-kobj);
-   }
+   }
kobject_del(glob-kobj);
kobject_put(glob-kobj);
 }
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c 
b/drivers/gpu/drm/ttm/ttm_page_alloc.c
new file mode 100644
index 000..5029fd0
--- /dev/null
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
@@ -0,0 +1,705 @@
+/*
+ * Copyright (c) Red Hat Inc.
+
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the Software),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie airl...@redhat.com
+ *  Jerome Glisse jgli...@redhat.com
+ *  Pauli Nieminen suok...@gmail.com
+ */
+
+/* simple list based uncached page pool
+ * - Pool collects resently freed pages for reuse
+ *