[Intel-gfx] [RFC] drm/i915/sw_fence: Allocate callbacks from dedicates slab caches

2016-11-02 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

Not sure if it matters for performance at all but it should
save some wastage and enable a better insight into the usage.

Signed-off-by: Tvrtko Ursulin 
Cc: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem.c  | 10 +-
 drivers/gpu/drm/i915/i915_sw_fence.c | 67 +---
 drivers/gpu/drm/i915/i915_sw_fence.h |  3 ++
 3 files changed, 75 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 5839bebba64a..57e550449992 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4748,6 +4748,10 @@ i915_gem_load_init(struct drm_device *dev)
goto err_vmas;
}
 
+   err = i915_init_sw_fences();
+   if (err)
+   goto err_requests;
+
mutex_lock(&dev_priv->drm.struct_mutex);
INIT_LIST_HEAD(&dev_priv->gt.timelines);
err = i915_gem_timeline_init(dev_priv,
@@ -4755,7 +4759,7 @@ i915_gem_load_init(struct drm_device *dev)
 "[execution]");
mutex_unlock(&dev_priv->drm.struct_mutex);
if (err)
-   goto err_requests;
+   goto err_sw_fences;
 
INIT_LIST_HEAD(&dev_priv->context_list);
INIT_WORK(&dev_priv->mm.free_work, __i915_gem_free_work);
@@ -4783,6 +4787,8 @@ i915_gem_load_init(struct drm_device *dev)
 
return 0;
 
+err_sw_fences:
+   i915_fini_sw_fences();
 err_requests:
kmem_cache_destroy(dev_priv->requests);
 err_vmas:
@@ -4799,6 +4805,8 @@ void i915_gem_load_cleanup(struct drm_device *dev)
 
WARN_ON(!llist_empty(&dev_priv->mm.free_list));
 
+   i915_fini_sw_fences();
+
kmem_cache_destroy(dev_priv->requests);
kmem_cache_destroy(dev_priv->vmas);
kmem_cache_destroy(dev_priv->objects);
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c 
b/drivers/gpu/drm/i915/i915_sw_fence.c
index 95f2f12e0917..5f814b60e2c0 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -17,6 +17,11 @@
 
 static DEFINE_SPINLOCK(i915_sw_fence_lock);
 
+static DEFINE_MUTEX(i915_sw_fence_mutex);
+static unsigned int i915_sw_fence_usecnt;
+static struct kmem_cache *i915_sw_fence_wq_cache;
+static struct kmem_cache *i915_sw_fence_cb_cache;
+
 static int __i915_sw_fence_notify(struct i915_sw_fence *fence,
  enum i915_sw_fence_notify state)
 {
@@ -138,7 +143,7 @@ static int i915_sw_fence_wake(wait_queue_t *wq, unsigned 
mode, int flags, void *
__i915_sw_fence_complete(wq->private, key);
i915_sw_fence_put(wq->private);
if (wq->flags & I915_SW_FENCE_FLAG_ALLOC)
-   kfree(wq);
+   kmem_cache_free(i915_sw_fence_wq_cache, wq);
return 0;
 }
 
@@ -212,7 +217,7 @@ static int __i915_sw_fence_await_sw_fence(struct 
i915_sw_fence *fence,
 
pending = 0;
if (!wq) {
-   wq = kmalloc(sizeof(*wq), gfp);
+   wq = kmem_cache_alloc(i915_sw_fence_wq_cache, gfp);
if (!wq) {
if (!gfpflags_allow_blocking(gfp))
return -ENOMEM;
@@ -290,7 +295,7 @@ static void dma_i915_sw_fence_wake(struct dma_fence *dma,
i915_sw_fence_commit(cb->fence);
dma_fence_put(cb->dma);
 
-   kfree(cb);
+   kmem_cache_free(i915_sw_fence_cb_cache, cb);
 }
 
 int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
@@ -304,7 +309,7 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence 
*fence,
if (dma_fence_is_signaled(dma))
return 0;
 
-   cb = kmalloc(sizeof(*cb), gfp);
+   cb = kmem_cache_alloc(i915_sw_fence_cb_cache, gfp);
if (!cb) {
if (!gfpflags_allow_blocking(gfp))
return -ENOMEM;
@@ -393,3 +398,57 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence 
*fence,
 
return ret;
 }
+
+int i915_init_sw_fences(void)
+{
+   struct kmem_cache *wq_cache, *cb_cache;
+
+   mutex_lock(&i915_sw_fence_mutex);
+
+   if (i915_sw_fence_usecnt == 0) {
+   wq_cache = kmem_cache_create("i915_sw_fence_wq",
+sizeof(wait_queue_t),
+__alignof__(wait_queue_head_t),
+0, NULL);
+   if (!wq_cache)
+   goto err;
+
+   cb_cache = kmem_cache_create("i915_sw_fence_cb",
+sizeof(struct 
i915_sw_dma_fence_cb),
+__alignof__(struct 
i915_sw_dma_fence_cb),
+0, NULL);
+   if (!cb_cache) {
+   kmem_cache_destroy(wq_cache);
+   goto err;
+   }
+
+   i915_sw_fence_wq_cache = wq_cache;
+   i915_sw_fence_cb_cach

Re: [Intel-gfx] [RFC] drm/i915/sw_fence: Allocate callbacks from dedicates slab caches

2016-11-02 Thread Chris Wilson
On Wed, Nov 02, 2016 at 05:44:07PM +, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin 
> 
> Not sure if it matters for performance at all but it should
> save some wastage and enable a better insight into the usage.

With the caveat of avoiding the plug into the i915.ko...

This should wait until after kernel/fence.c, as the split will make this
a little more interesting.

> +int i915_init_sw_fences(void)
> +{
> + struct kmem_cache *wq_cache, *cb_cache;
> +
> + mutex_lock(&i915_sw_fence_mutex);

If switched over to init ctor, the mutex is then redundant.

> + if (i915_sw_fence_usecnt == 0) {
> + wq_cache = kmem_cache_create("i915_sw_fence_wq",
> +  sizeof(wait_queue_t),
> +  __alignof__(wait_queue_head_t),
> +  0, NULL);
> + if (!wq_cache)
> + goto err;
> +
> + cb_cache = kmem_cache_create("i915_sw_fence_cb",
> +  sizeof(struct 
> i915_sw_dma_fence_cb),
> +  __alignof__(struct 
> i915_sw_dma_fence_cb),
> +  0, NULL);

For example, this cache will need to end up in
drivers/dma-buf/dma-fence.c
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx