[PATCH RFC v4 13/16] drm, cgroup: Allow more aggressive memory reclaim

2019-08-28 Thread Kenny Ho
Allow DRM TTM memory manager to register a work_struct, such that, when
a drmcgrp is under memory pressure, memory reclaiming can be triggered
immediately.

Change-Id: I25ac04e2db9c19ff12652b88ebff18b44b2706d8
Signed-off-by: Kenny Ho 
---
 drivers/gpu/drm/ttm/ttm_bo.c| 49 +
 include/drm/drm_cgroup.h| 16 +++
 include/drm/ttm/ttm_bo_driver.h |  2 ++
 kernel/cgroup/drm.c | 30 
 4 files changed, 97 insertions(+)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index d7e3d3128ebb..72efae694b7e 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1590,6 +1590,46 @@ int ttm_bo_evict_mm(struct ttm_bo_device *bdev, unsigned 
mem_type)
 }
 EXPORT_SYMBOL(ttm_bo_evict_mm);
 
+static void ttm_bo_reclaim_wq(struct work_struct *work)
+{
+   struct ttm_operation_ctx ctx = {
+   .interruptible = false,
+   .no_wait_gpu = false,
+   .flags = TTM_OPT_FLAG_FORCE_ALLOC
+   };
+   struct ttm_mem_type_manager *man =
+   container_of(work, struct ttm_mem_type_manager, reclaim_wq);
+   struct ttm_bo_device *bdev = man->bdev;
+   struct dma_fence *fence;
+   int mem_type;
+   int ret;
+
+   for (mem_type = 0; mem_type < TTM_NUM_MEM_TYPES; mem_type++)
+   if (&bdev->man[mem_type] == man)
+   break;
+
+   WARN_ON(mem_type >= TTM_NUM_MEM_TYPES);
+   if (mem_type >= TTM_NUM_MEM_TYPES)
+   return;
+
+   if (!drmcg_mem_pressure_scan(bdev, mem_type))
+   return;
+
+   ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx, NULL);
+   if (ret)
+   return;
+
+   spin_lock(&man->move_lock);
+   fence = dma_fence_get(man->move);
+   spin_unlock(&man->move_lock);
+
+   if (fence) {
+   ret = dma_fence_wait(fence, false);
+   dma_fence_put(fence);
+   }
+
+}
+
 int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type,
unsigned long p_size)
 {
@@ -1624,6 +1664,13 @@ int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned 
type,
INIT_LIST_HEAD(&man->lru[i]);
man->move = NULL;
 
+   pr_err("drmcg %p type %d\n", bdev->ddev, type);
+
+   if (type <= TTM_PL_VRAM) {
+   INIT_WORK(&man->reclaim_wq, ttm_bo_reclaim_wq);
+   drmcg_register_device_mm(bdev->ddev, type, &man->reclaim_wq);
+   }
+
return 0;
 }
 EXPORT_SYMBOL(ttm_bo_init_mm);
@@ -1701,6 +1748,8 @@ int ttm_bo_device_release(struct ttm_bo_device *bdev)
man = &bdev->man[i];
if (man->has_type) {
man->use_type = false;
+   drmcg_unregister_device_mm(bdev->ddev, i);
+   cancel_work_sync(&man->reclaim_wq);
if ((i != TTM_PL_SYSTEM) && ttm_bo_clean_mm(bdev, i)) {
ret = -EBUSY;
pr_err("DRM memory manager type %d is not 
clean\n",
diff --git a/include/drm/drm_cgroup.h b/include/drm/drm_cgroup.h
index c11df388fdf2..6d9707e1eb72 100644
--- a/include/drm/drm_cgroup.h
+++ b/include/drm/drm_cgroup.h
@@ -5,6 +5,7 @@
 #define __DRM_CGROUP_H__
 
 #include 
+#include 
 #include 
 #include 
 
@@ -25,12 +26,17 @@ struct drmcg_props {
s64 mem_bw_avg_bytes_per_us_default;
 
s64 mem_highs_default[TTM_PL_PRIV+1];
+
+   struct work_struct  *mem_reclaim_wq[TTM_PL_PRIV];
 };
 
 #ifdef CONFIG_CGROUP_DRM
 
 void drmcg_device_update(struct drm_device *device);
 void drmcg_device_early_init(struct drm_device *device);
+void drmcg_register_device_mm(struct drm_device *dev, unsigned int type,
+   struct work_struct *wq);
+void drmcg_unregister_device_mm(struct drm_device *dev, unsigned int type);
 bool drmcg_try_chg_bo_alloc(struct drmcg *drmcg, struct drm_device *dev,
size_t size);
 void drmcg_unchg_bo_alloc(struct drmcg *drmcg, struct drm_device *dev,
@@ -53,6 +59,16 @@ static inline void drmcg_device_early_init(struct drm_device 
*device)
 {
 }
 
+static inline void drmcg_register_device_mm(struct drm_device *dev,
+   unsigned int type, struct work_struct *wq)
+{
+}
+
+static inline void drmcg_unregister_device_mm(struct drm_device *dev,
+   unsigned int type)
+{
+}
+
 static inline void drmcg_try_chg_bo_alloc(struct drmcg *drmcg,
struct drm_device *dev, size_t size)
 {
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index e1a805d65b83..529cef92bcf6 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -205,6 +205,8 @@ struct ttm_mem_type_manager {
 * Protected by @move_lock.
 */
struct dma_fence *move;
+
+   struct work_struct reclaim_wq;
 };
 
 /**
diff --git a/kernel/cgroup/drm.c b/kernel/cgr

Re: [PATCH RFC v4 13/16] drm, cgroup: Allow more aggressive memory reclaim

2019-08-29 Thread Koenig, Christian
Am 29.08.19 um 08:05 schrieb Kenny Ho:
> Allow DRM TTM memory manager to register a work_struct, such that, when
> a drmcgrp is under memory pressure, memory reclaiming can be triggered
> immediately.
>
> Change-Id: I25ac04e2db9c19ff12652b88ebff18b44b2706d8
> Signed-off-by: Kenny Ho 
> ---
>   drivers/gpu/drm/ttm/ttm_bo.c| 49 +
>   include/drm/drm_cgroup.h| 16 +++
>   include/drm/ttm/ttm_bo_driver.h |  2 ++
>   kernel/cgroup/drm.c | 30 
>   4 files changed, 97 insertions(+)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index d7e3d3128ebb..72efae694b7e 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -1590,6 +1590,46 @@ int ttm_bo_evict_mm(struct ttm_bo_device *bdev, 
> unsigned mem_type)
>   }
>   EXPORT_SYMBOL(ttm_bo_evict_mm);
>   
> +static void ttm_bo_reclaim_wq(struct work_struct *work)
> +{
> + struct ttm_operation_ctx ctx = {
> + .interruptible = false,
> + .no_wait_gpu = false,
> + .flags = TTM_OPT_FLAG_FORCE_ALLOC
> + };
> + struct ttm_mem_type_manager *man =
> + container_of(work, struct ttm_mem_type_manager, reclaim_wq);
> + struct ttm_bo_device *bdev = man->bdev;
> + struct dma_fence *fence;
> + int mem_type;
> + int ret;
> +
> + for (mem_type = 0; mem_type < TTM_NUM_MEM_TYPES; mem_type++)
> + if (&bdev->man[mem_type] == man)
> + break;
> +
> + WARN_ON(mem_type >= TTM_NUM_MEM_TYPES);
> + if (mem_type >= TTM_NUM_MEM_TYPES)
> + return;
> +
> + if (!drmcg_mem_pressure_scan(bdev, mem_type))
> + return;
> +
> + ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx, NULL);
> + if (ret)
> + return;
> +
> + spin_lock(&man->move_lock);
> + fence = dma_fence_get(man->move);
> + spin_unlock(&man->move_lock);
> +
> + if (fence) {
> + ret = dma_fence_wait(fence, false);
> + dma_fence_put(fence);
> + }

Why do you want to block for the fence here? That is a rather bad idea 
and would break pipe-lining.

Apart from that I don't think we should put that into TTM.

Instead drmcg_register_device_mm() should get a function pointer which 
is called from a work item when the group is under pressure.

TTM can then provides the function which can be called, but the actually 
registration is job of the device and not TTM.

Regards,
Christian.

> +
> +}
> +
>   int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type,
>   unsigned long p_size)
>   {
> @@ -1624,6 +1664,13 @@ int ttm_bo_init_mm(struct ttm_bo_device *bdev, 
> unsigned type,
>   INIT_LIST_HEAD(&man->lru[i]);
>   man->move = NULL;
>   
> + pr_err("drmcg %p type %d\n", bdev->ddev, type);
> +
> + if (type <= TTM_PL_VRAM) {
> + INIT_WORK(&man->reclaim_wq, ttm_bo_reclaim_wq);
> + drmcg_register_device_mm(bdev->ddev, type, &man->reclaim_wq);
> + }
> +
>   return 0;
>   }
>   EXPORT_SYMBOL(ttm_bo_init_mm);
> @@ -1701,6 +1748,8 @@ int ttm_bo_device_release(struct ttm_bo_device *bdev)
>   man = &bdev->man[i];
>   if (man->has_type) {
>   man->use_type = false;
> + drmcg_unregister_device_mm(bdev->ddev, i);
> + cancel_work_sync(&man->reclaim_wq);
>   if ((i != TTM_PL_SYSTEM) && ttm_bo_clean_mm(bdev, i)) {
>   ret = -EBUSY;
>   pr_err("DRM memory manager type %d is not 
> clean\n",
> diff --git a/include/drm/drm_cgroup.h b/include/drm/drm_cgroup.h
> index c11df388fdf2..6d9707e1eb72 100644
> --- a/include/drm/drm_cgroup.h
> +++ b/include/drm/drm_cgroup.h
> @@ -5,6 +5,7 @@
>   #define __DRM_CGROUP_H__
>   
>   #include 
> +#include 
>   #include 
>   #include 
>   
> @@ -25,12 +26,17 @@ struct drmcg_props {
>   s64 mem_bw_avg_bytes_per_us_default;
>   
>   s64 mem_highs_default[TTM_PL_PRIV+1];
> +
> + struct work_struct  *mem_reclaim_wq[TTM_PL_PRIV];
>   };
>   
>   #ifdef CONFIG_CGROUP_DRM
>   
>   void drmcg_device_update(struct drm_device *device);
>   void drmcg_device_early_init(struct drm_device *device);
> +void drmcg_register_device_mm(struct drm_device *dev, unsigned int type,
> + struct work_struct *wq);
> +void drmcg_unregister_device_mm(struct drm_device *dev, unsigned int type);
>   bool drmcg_try_chg_bo_alloc(struct drmcg *drmcg, struct drm_device *dev,
>   size_t size);
>   void drmcg_unchg_bo_alloc(struct drmcg *drmcg, struct drm_device *dev,
> @@ -53,6 +59,16 @@ static inline void drmcg_device_early_init(struct 
> drm_device *device)
>   {
>   }
>   
> +static inline void drmcg_register_device_mm(struct drm_device *dev,
> + unsigned int type, struct work_struct *wq)
> +{
>

Re: [PATCH RFC v4 13/16] drm, cgroup: Allow more aggressive memory reclaim

2019-08-29 Thread Kenny Ho
Thanks for the feedback Christian.  I am still digging into this one.
Daniel suggested leveraging the Shrinker API for the functionality of this
commit in RFC v3 but I am still trying to figure it out how/if ttm fit with
shrinker (though the idea behind the shrinker API seems fairly
straightforward as far as I understand it currently.)

Regards,
Kenny

On Thu, Aug 29, 2019 at 3:08 AM Koenig, Christian 
wrote:

> Am 29.08.19 um 08:05 schrieb Kenny Ho:
> > Allow DRM TTM memory manager to register a work_struct, such that, when
> > a drmcgrp is under memory pressure, memory reclaiming can be triggered
> > immediately.
> >
> > Change-Id: I25ac04e2db9c19ff12652b88ebff18b44b2706d8
> > Signed-off-by: Kenny Ho 
> > ---
> >   drivers/gpu/drm/ttm/ttm_bo.c| 49 +
> >   include/drm/drm_cgroup.h| 16 +++
> >   include/drm/ttm/ttm_bo_driver.h |  2 ++
> >   kernel/cgroup/drm.c | 30 
> >   4 files changed, 97 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> > index d7e3d3128ebb..72efae694b7e 100644
> > --- a/drivers/gpu/drm/ttm/ttm_bo.c
> > +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> > @@ -1590,6 +1590,46 @@ int ttm_bo_evict_mm(struct ttm_bo_device *bdev,
> unsigned mem_type)
> >   }
> >   EXPORT_SYMBOL(ttm_bo_evict_mm);
> >
> > +static void ttm_bo_reclaim_wq(struct work_struct *work)
> > +{
> > + struct ttm_operation_ctx ctx = {
> > + .interruptible = false,
> > + .no_wait_gpu = false,
> > + .flags = TTM_OPT_FLAG_FORCE_ALLOC
> > + };
> > + struct ttm_mem_type_manager *man =
> > + container_of(work, struct ttm_mem_type_manager, reclaim_wq);
> > + struct ttm_bo_device *bdev = man->bdev;
> > + struct dma_fence *fence;
> > + int mem_type;
> > + int ret;
> > +
> > + for (mem_type = 0; mem_type < TTM_NUM_MEM_TYPES; mem_type++)
> > + if (&bdev->man[mem_type] == man)
> > + break;
> > +
> > + WARN_ON(mem_type >= TTM_NUM_MEM_TYPES);
> > + if (mem_type >= TTM_NUM_MEM_TYPES)
> > + return;
> > +
> > + if (!drmcg_mem_pressure_scan(bdev, mem_type))
> > + return;
> > +
> > + ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx, NULL);
> > + if (ret)
> > + return;
> > +
> > + spin_lock(&man->move_lock);
> > + fence = dma_fence_get(man->move);
> > + spin_unlock(&man->move_lock);
> > +
> > + if (fence) {
> > + ret = dma_fence_wait(fence, false);
> > + dma_fence_put(fence);
> > + }
>
> Why do you want to block for the fence here? That is a rather bad idea
> and would break pipe-lining.
>
> Apart from that I don't think we should put that into TTM.
>
> Instead drmcg_register_device_mm() should get a function pointer which
> is called from a work item when the group is under pressure.
>
> TTM can then provides the function which can be called, but the actually
> registration is job of the device and not TTM.
>
> Regards,
> Christian.
>
> > +
> > +}
> > +
> >   int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type,
> >   unsigned long p_size)
> >   {
> > @@ -1624,6 +1664,13 @@ int ttm_bo_init_mm(struct ttm_bo_device *bdev,
> unsigned type,
> >   INIT_LIST_HEAD(&man->lru[i]);
> >   man->move = NULL;
> >
> > + pr_err("drmcg %p type %d\n", bdev->ddev, type);
> > +
> > + if (type <= TTM_PL_VRAM) {
> > + INIT_WORK(&man->reclaim_wq, ttm_bo_reclaim_wq);
> > + drmcg_register_device_mm(bdev->ddev, type,
> &man->reclaim_wq);
> > + }
> > +
> >   return 0;
> >   }
> >   EXPORT_SYMBOL(ttm_bo_init_mm);
> > @@ -1701,6 +1748,8 @@ int ttm_bo_device_release(struct ttm_bo_device
> *bdev)
> >   man = &bdev->man[i];
> >   if (man->has_type) {
> >   man->use_type = false;
> > + drmcg_unregister_device_mm(bdev->ddev, i);
> > + cancel_work_sync(&man->reclaim_wq);
> >   if ((i != TTM_PL_SYSTEM) && ttm_bo_clean_mm(bdev,
> i)) {
> >   ret = -EBUSY;
> >   pr_err("DRM memory manager type %d is not
> clean\n",
> > diff --git a/include/drm/drm_cgroup.h b/include/drm/drm_cgroup.h
> > index c11df388fdf2..6d9707e1eb72 100644
> > --- a/include/drm/drm_cgroup.h
> > +++ b/include/drm/drm_cgroup.h
> > @@ -5,6 +5,7 @@
> >   #define __DRM_CGROUP_H__
> >
> >   #include 
> > +#include 
> >   #include 
> >   #include 
> >
> > @@ -25,12 +26,17 @@ struct drmcg_props {
> >   s64 mem_bw_avg_bytes_per_us_default;
> >
> >   s64 mem_highs_default[TTM_PL_PRIV+1];
> > +
> > + struct work_struct  *mem_reclaim_wq[TTM_PL_PRIV];
> >   };
> >
> >   #ifdef CONFIG_CGROUP_DRM
> >
> >   void drmcg_device_update(struct drm_device *device);
> >   void drmcg_device_early_init(struct drm_d

Re: [PATCH RFC v4 13/16] drm, cgroup: Allow more aggressive memory reclaim

2019-08-29 Thread Koenig, Christian
Yeah, that's also a really good idea as well.

The problem with the shrinker API is that it only applies to system memory 
currently.

So you won't have a distinction which domain you need to evict stuff from.

Regards,
Christian.

Am 29.08.19 um 16:07 schrieb Kenny Ho:
Thanks for the feedback Christian.  I am still digging into this one.  Daniel 
suggested leveraging the Shrinker API for the functionality of this commit in 
RFC v3 but I am still trying to figure it out how/if ttm fit with shrinker 
(though the idea behind the shrinker API seems fairly straightforward as far as 
I understand it currently.)

Regards,
Kenny

On Thu, Aug 29, 2019 at 3:08 AM Koenig, Christian 
mailto:christian.koe...@amd.com>> wrote:
Am 29.08.19 um 08:05 schrieb Kenny Ho:
> Allow DRM TTM memory manager to register a work_struct, such that, when
> a drmcgrp is under memory pressure, memory reclaiming can be triggered
> immediately.
>
> Change-Id: I25ac04e2db9c19ff12652b88ebff18b44b2706d8
> Signed-off-by: Kenny Ho mailto:kenny...@amd.com>>
> ---
>   drivers/gpu/drm/ttm/ttm_bo.c| 49 +
>   include/drm/drm_cgroup.h| 16 +++
>   include/drm/ttm/ttm_bo_driver.h |  2 ++
>   kernel/cgroup/drm.c | 30 
>   4 files changed, 97 insertions(+)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index d7e3d3128ebb..72efae694b7e 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -1590,6 +1590,46 @@ int ttm_bo_evict_mm(struct ttm_bo_device *bdev, 
> unsigned mem_type)
>   }
>   EXPORT_SYMBOL(ttm_bo_evict_mm);
>
> +static void ttm_bo_reclaim_wq(struct work_struct *work)
> +{
> + struct ttm_operation_ctx ctx = {
> + .interruptible = false,
> + .no_wait_gpu = false,
> + .flags = TTM_OPT_FLAG_FORCE_ALLOC
> + };
> + struct ttm_mem_type_manager *man =
> + container_of(work, struct ttm_mem_type_manager, reclaim_wq);
> + struct ttm_bo_device *bdev = man->bdev;
> + struct dma_fence *fence;
> + int mem_type;
> + int ret;
> +
> + for (mem_type = 0; mem_type < TTM_NUM_MEM_TYPES; mem_type++)
> + if (&bdev->man[mem_type] == man)
> + break;
> +
> + WARN_ON(mem_type >= TTM_NUM_MEM_TYPES);
> + if (mem_type >= TTM_NUM_MEM_TYPES)
> + return;
> +
> + if (!drmcg_mem_pressure_scan(bdev, mem_type))
> + return;
> +
> + ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx, NULL);
> + if (ret)
> + return;
> +
> + spin_lock(&man->move_lock);
> + fence = dma_fence_get(man->move);
> + spin_unlock(&man->move_lock);
> +
> + if (fence) {
> + ret = dma_fence_wait(fence, false);
> + dma_fence_put(fence);
> + }

Why do you want to block for the fence here? That is a rather bad idea
and would break pipe-lining.

Apart from that I don't think we should put that into TTM.

Instead drmcg_register_device_mm() should get a function pointer which
is called from a work item when the group is under pressure.

TTM can then provides the function which can be called, but the actually
registration is job of the device and not TTM.

Regards,
Christian.

> +
> +}
> +
>   int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type,
>   unsigned long p_size)
>   {
> @@ -1624,6 +1664,13 @@ int ttm_bo_init_mm(struct ttm_bo_device *bdev, 
> unsigned type,
>   INIT_LIST_HEAD(&man->lru[i]);
>   man->move = NULL;
>
> + pr_err("drmcg %p type %d\n", bdev->ddev, type);
> +
> + if (type <= TTM_PL_VRAM) {
> + INIT_WORK(&man->reclaim_wq, ttm_bo_reclaim_wq);
> + drmcg_register_device_mm(bdev->ddev, type, &man->reclaim_wq);
> + }
> +
>   return 0;
>   }
>   EXPORT_SYMBOL(ttm_bo_init_mm);
> @@ -1701,6 +1748,8 @@ int ttm_bo_device_release(struct ttm_bo_device *bdev)
>   man = &bdev->man[i];
>   if (man->has_type) {
>   man->use_type = false;
> + drmcg_unregister_device_mm(bdev->ddev, i);
> + cancel_work_sync(&man->reclaim_wq);
>   if ((i != TTM_PL_SYSTEM) && ttm_bo_clean_mm(bdev, i)) {
>   ret = -EBUSY;
>   pr_err("DRM memory manager type %d is not 
> clean\n",
> diff --git a/include/drm/drm_cgroup.h b/include/drm/drm_cgroup.h
> index c11df388fdf2..6d9707e1eb72 100644
> --- a/include/drm/drm_cgroup.h
> +++ b/include/drm/drm_cgroup.h
> @@ -5,6 +5,7 @@
>   #define __DRM_CGROUP_H__
>
>   #include 
> +#include 
>   #include 
>   #include 
>
> @@ -25,12 +26,17 @@ struct drmcg_props {
>   s64 mem_bw_avg_bytes_per_us_default;
>
>   s64 mem_highs_default[TTM_PL_PRIV+1];
> +
> + struct work_struct  *mem_reclaim_wq[TTM_PL_PRIV];
>   };
>
>   #ifdef CONFIG_CGROUP_DRM
>
>   void drmcg_devi

Re: [PATCH RFC v4 13/16] drm, cgroup: Allow more aggressive memory reclaim

2019-08-29 Thread Kenny Ho
Yes, and I think it has quite a lot of coupling with mm's page and
pressure mechanisms.  My current thought is to just copy the API but
have a separate implementation of "ttm_shrinker" and
"ttm_shrinker_control" or something like that.  I am certainly happy
to listen to additional feedbacks and suggestions.

Regards,
Kenny


On Thu, Aug 29, 2019 at 10:12 AM Koenig, Christian
 wrote:
>
> Yeah, that's also a really good idea as well.
>
> The problem with the shrinker API is that it only applies to system memory 
> currently.
>
> So you won't have a distinction which domain you need to evict stuff from.
>
> Regards,
> Christian.
>
> Am 29.08.19 um 16:07 schrieb Kenny Ho:
>
> Thanks for the feedback Christian.  I am still digging into this one.  Daniel 
> suggested leveraging the Shrinker API for the functionality of this commit in 
> RFC v3 but I am still trying to figure it out how/if ttm fit with shrinker 
> (though the idea behind the shrinker API seems fairly straightforward as far 
> as I understand it currently.)
>
> Regards,
> Kenny
>
> On Thu, Aug 29, 2019 at 3:08 AM Koenig, Christian  
> wrote:
>>
>> Am 29.08.19 um 08:05 schrieb Kenny Ho:
>> > Allow DRM TTM memory manager to register a work_struct, such that, when
>> > a drmcgrp is under memory pressure, memory reclaiming can be triggered
>> > immediately.
>> >
>> > Change-Id: I25ac04e2db9c19ff12652b88ebff18b44b2706d8
>> > Signed-off-by: Kenny Ho 
>> > ---
>> >   drivers/gpu/drm/ttm/ttm_bo.c| 49 +
>> >   include/drm/drm_cgroup.h| 16 +++
>> >   include/drm/ttm/ttm_bo_driver.h |  2 ++
>> >   kernel/cgroup/drm.c | 30 
>> >   4 files changed, 97 insertions(+)
>> >
>> > diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
>> > index d7e3d3128ebb..72efae694b7e 100644
>> > --- a/drivers/gpu/drm/ttm/ttm_bo.c
>> > +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>> > @@ -1590,6 +1590,46 @@ int ttm_bo_evict_mm(struct ttm_bo_device *bdev, 
>> > unsigned mem_type)
>> >   }
>> >   EXPORT_SYMBOL(ttm_bo_evict_mm);
>> >
>> > +static void ttm_bo_reclaim_wq(struct work_struct *work)
>> > +{
>> > + struct ttm_operation_ctx ctx = {
>> > + .interruptible = false,
>> > + .no_wait_gpu = false,
>> > + .flags = TTM_OPT_FLAG_FORCE_ALLOC
>> > + };
>> > + struct ttm_mem_type_manager *man =
>> > + container_of(work, struct ttm_mem_type_manager, reclaim_wq);
>> > + struct ttm_bo_device *bdev = man->bdev;
>> > + struct dma_fence *fence;
>> > + int mem_type;
>> > + int ret;
>> > +
>> > + for (mem_type = 0; mem_type < TTM_NUM_MEM_TYPES; mem_type++)
>> > + if (&bdev->man[mem_type] == man)
>> > + break;
>> > +
>> > + WARN_ON(mem_type >= TTM_NUM_MEM_TYPES);
>> > + if (mem_type >= TTM_NUM_MEM_TYPES)
>> > + return;
>> > +
>> > + if (!drmcg_mem_pressure_scan(bdev, mem_type))
>> > + return;
>> > +
>> > + ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx, NULL);
>> > + if (ret)
>> > + return;
>> > +
>> > + spin_lock(&man->move_lock);
>> > + fence = dma_fence_get(man->move);
>> > + spin_unlock(&man->move_lock);
>> > +
>> > + if (fence) {
>> > + ret = dma_fence_wait(fence, false);
>> > + dma_fence_put(fence);
>> > + }
>>
>> Why do you want to block for the fence here? That is a rather bad idea
>> and would break pipe-lining.
>>
>> Apart from that I don't think we should put that into TTM.
>>
>> Instead drmcg_register_device_mm() should get a function pointer which
>> is called from a work item when the group is under pressure.
>>
>> TTM can then provides the function which can be called, but the actually
>> registration is job of the device and not TTM.
>>
>> Regards,
>> Christian.
>>
>> > +
>> > +}
>> > +
>> >   int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type,
>> >   unsigned long p_size)
>> >   {
>> > @@ -1624,6 +1664,13 @@ int ttm_bo_init_mm(struct ttm_bo_device *bdev, 
>> > unsigned type,
>> >   INIT_LIST_HEAD(&man->lru[i]);
>> >   man->move = NULL;
>> >
>> > + pr_err("drmcg %p type %d\n", bdev->ddev, type);
>> > +
>> > + if (type <= TTM_PL_VRAM) {
>> > + INIT_WORK(&man->reclaim_wq, ttm_bo_reclaim_wq);
>> > + drmcg_register_device_mm(bdev->ddev, type, &man->reclaim_wq);
>> > + }
>> > +
>> >   return 0;
>> >   }
>> >   EXPORT_SYMBOL(ttm_bo_init_mm);
>> > @@ -1701,6 +1748,8 @@ int ttm_bo_device_release(struct ttm_bo_device *bdev)
>> >   man = &bdev->man[i];
>> >   if (man->has_type) {
>> >   man->use_type = false;
>> > + drmcg_unregister_device_mm(bdev->ddev, i);
>> > + cancel_work_sync(&man->reclaim_wq);
>> >   if ((i != TTM_PL_SYSTEM) && ttm_bo_clean_mm(bdev, 
>> > i)) {
>> >