[PATCH v2] drm/vc4: Add the DRM_IOCTL_VC4_GEM_MADVISE ioctl

2017-10-04 Thread Boris Brezillon
This ioctl will allow us to purge inactive userspace buffers when the
system is running out of contiguous memory.

For now, the purge logic is rather dumb in that it does not try to
release only the amount of BO needed to meet the last CMA alloc request
but instead purges all objects placed in the purgeable pool as soon as
we experience a CMA allocation failure.

Note that the in-kernel BO cache is always purged before the purgeable
cache because those objects are known to be unused while objects marked
as purgeable by a userspace application/library might have to be
restored when they are marked back as unpurgeable, which can be
expensive.

Signed-off-by: Boris Brezillon 
---
Hello,

Updates to libdrm, mesa and igt making use of this kernel feature can
be found on my github repos [1][2][3].

There's currently no debugfs hook to manually force a purge, but this
is being discussed and might be added later on.

Regards,

Boris

[1]https://github.com/bbrezillon/drm/tree/vc4/purgeable
[2]https://github.com/bbrezillon/mesa/tree/vc4/purgeable
[3]https://github.com/bbrezillon/igt/tree/vc4/purgeable

Changes in v2:
- Do not re-allocate BO's memory after when WILLNEED is asked after a purge
- Add purgeable/purged stats to debugfs and dumpstats
- Pad the drm_vc4_gem_madvise to make it 64-bit aligned
- Forbid madvise calls on internal BO objects (everything that is not
  meant to be exposed to userspace)
- Do not increment/decrement usecnt on internal BOs (they cannot be marked
  purgeable, so doing that is useless and error prone)
- Fix a few bugs related to usecnt refcounting
---
 drivers/gpu/drm/vc4/vc4_bo.c| 284 ++--
 drivers/gpu/drm/vc4/vc4_drv.c   |  10 +-
 drivers/gpu/drm/vc4/vc4_drv.h   |  44 +++
 drivers/gpu/drm/vc4/vc4_gem.c   | 153 +-
 drivers/gpu/drm/vc4/vc4_plane.c |  20 +++
 include/uapi/drm/vc4_drm.h  |  18 +++
 6 files changed, 512 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c
index 3afdbf4bc10b..edb0062a58c7 100644
--- a/drivers/gpu/drm/vc4/vc4_bo.c
+++ b/drivers/gpu/drm/vc4/vc4_bo.c
@@ -42,7 +42,8 @@ static bool is_user_label(int label)
 
 static void vc4_bo_stats_dump(struct vc4_dev *vc4)
 {
-   int i;
+   size_t purgeable_size, purged_size;
+   int i, npurgeable, npurged;
 
for (i = 0; i < vc4->num_labels; i++) {
if (!vc4->bo_labels[i].num_allocated)
@@ -53,6 +54,21 @@ static void vc4_bo_stats_dump(struct vc4_dev *vc4)
 vc4->bo_labels[i].size_allocated / 1024,
 vc4->bo_labels[i].num_allocated);
}
+
+   mutex_lock(&vc4->purgeable.lock);
+   npurgeable = vc4->purgeable.num;
+   purgeable_size = vc4->purgeable.size;
+   purged_size = vc4->purgeable.purged_size;
+   npurged = vc4->purgeable.purged_num;
+   mutex_unlock(&vc4->purgeable.lock);
+
+   if (npurgeable)
+   DRM_INFO("%30s: %6dkb BOs (%d)\n", "userspace BO cache",
+purgeable_size / 1024, npurgeable);
+
+   if (npurged)
+   DRM_INFO("%30s: %6dkb BOs (%d)\n", "total purged BO",
+purged_size / 1024, npurged);
 }
 
 #ifdef CONFIG_DEBUG_FS
@@ -61,7 +77,8 @@ int vc4_bo_stats_debugfs(struct seq_file *m, void *unused)
struct drm_info_node *node = (struct drm_info_node *)m->private;
struct drm_device *dev = node->minor->dev;
struct vc4_dev *vc4 = to_vc4_dev(dev);
-   int i;
+   size_t purgeable_size, purged_size;
+   int i, npurgeable, npurged;
 
mutex_lock(&vc4->bo_lock);
for (i = 0; i < vc4->num_labels; i++) {
@@ -75,6 +92,21 @@ int vc4_bo_stats_debugfs(struct seq_file *m, void *unused)
}
mutex_unlock(&vc4->bo_lock);
 
+   mutex_lock(&vc4->purgeable.lock);
+   npurgeable = vc4->purgeable.num;
+   purgeable_size = vc4->purgeable.size;
+   purged_size = vc4->purgeable.purged_size;
+   npurged = vc4->purgeable.purged_num;
+   mutex_unlock(&vc4->purgeable.lock);
+
+   if (npurgeable)
+   seq_printf(m, "%30s: %6dkb BOs (%d)\n", "userspace BO cache",
+  purgeable_size / 1024, npurgeable);
+
+   if (npurged)
+   seq_printf(m, "%30s: %6dkb BOs (%d)\n", "total purged BO",
+  purged_size / 1024, npurged);
+
return 0;
 }
 #endif
@@ -247,6 +279,104 @@ static void vc4_bo_cache_purge(struct drm_device *dev)
mutex_unlock(&vc4->bo_lock);
 }
 
+void vc4_bo_add_to_purgeable_pool(struct vc4_bo *bo)
+{
+   struct vc4_dev *vc4 = to_vc4_dev(bo->base.base.dev);
+
+   mutex_lock(&vc4->purgeable.lock);
+   list_add_tail(&bo->size_head, &vc4->purgeable.list);
+   vc4->purgeable.num++;
+   vc4->purgeable.size += bo->base.base.size;
+   mutex_unlock(&vc4->purgeable.lock);
+}
+
+static void vc4_bo_remove_from_purgeable_pool_locked(struct vc4_bo *bo)
+{

Re: [PATCH v2] drm/vc4: Add the DRM_IOCTL_VC4_GEM_MADVISE ioctl

2017-10-04 Thread kbuild test robot
Hi Boris,

[auto build test WARNING on v4.14-rc2]
[also build test WARNING on next-20170929]
[cannot apply to anholt/for-next]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Boris-Brezillon/drm-vc4-Add-the-DRM_IOCTL_VC4_GEM_MADVISE-ioctl/20171005-081733
config: x86_64-allmodconfig (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
# save the attached .config to linux build tree
make ARCH=x86_64 

All warnings (new ones prefixed by >>):

   In file included from include/linux/printk.h:6:0,
from include/linux/kernel.h:13,
from include/asm-generic/bug.h:15,
from arch/x86/include/asm/bug.h:81,
from include/linux/bug.h:4,
from include/linux/scatterlist.h:6,
from include/linux/dma-buf.h:29,
from drivers/gpu//drm/vc4/vc4_bo.c:22:
   drivers/gpu//drm/vc4/vc4_bo.c: In function 'vc4_bo_stats_dump':
   include/linux/kern_levels.h:4:18: warning: format '%d' expects argument of 
type 'int', but argument 3 has type 'size_t {aka long unsigned int}' [-Wformat=]
#define KERN_SOH "\001"  /* ASCII Start Of Header */
 ^
   include/linux/kern_levels.h:13:19: note: in expansion of macro 'KERN_SOH'
#define KERN_INFO KERN_SOH "6" /* informational */
  ^~~~
>> include/drm/drmP.h:150:16: note: in expansion of macro 'KERN_INFO'
  printk##once(KERN_##level "[" DRM_NAME "] " fmt, \
   ^
>> include/drm/drmP.h:155:2: note: in expansion of macro '_DRM_PRINTK'
 _DRM_PRINTK(, INFO, fmt, ##__VA_ARGS__)
 ^~~
>> drivers/gpu//drm/vc4/vc4_bo.c:66:3: note: in expansion of macro 'DRM_INFO'
  DRM_INFO("%30s: %6dkb BOs (%d)\n", "userspace BO cache",
  ^~~~
   include/linux/kern_levels.h:4:18: warning: format '%d' expects argument of 
type 'int', but argument 3 has type 'size_t {aka long unsigned int}' [-Wformat=]
#define KERN_SOH "\001"  /* ASCII Start Of Header */
 ^
   include/linux/kern_levels.h:13:19: note: in expansion of macro 'KERN_SOH'
#define KERN_INFO KERN_SOH "6" /* informational */
  ^~~~
>> include/drm/drmP.h:150:16: note: in expansion of macro 'KERN_INFO'
  printk##once(KERN_##level "[" DRM_NAME "] " fmt, \
   ^
>> include/drm/drmP.h:155:2: note: in expansion of macro '_DRM_PRINTK'
 _DRM_PRINTK(, INFO, fmt, ##__VA_ARGS__)
 ^~~
   drivers/gpu//drm/vc4/vc4_bo.c:70:3: note: in expansion of macro 'DRM_INFO'
  DRM_INFO("%30s: %6dkb BOs (%d)\n", "total purged BO",
  ^~~~
   drivers/gpu//drm/vc4/vc4_bo.c: In function 'vc4_bo_stats_debugfs':
>> drivers/gpu//drm/vc4/vc4_bo.c:103:26: warning: format '%d' expects argument 
>> of type 'int', but argument 4 has type 'size_t {aka long unsigned int}' 
>> [-Wformat=]
  seq_printf(m, "%30s: %6dkb BOs (%d)\n", "userspace BO cache",
 ^
   drivers/gpu//drm/vc4/vc4_bo.c:107:26: warning: format '%d' expects argument 
of type 'int', but argument 4 has type 'size_t {aka long unsigned int}' 
[-Wformat=]
  seq_printf(m, "%30s: %6dkb BOs (%d)\n", "total purged BO",
 ^
--
   In file included from include/linux/printk.h:6:0,
from include/linux/kernel.h:13,
from include/asm-generic/bug.h:15,
from arch/x86/include/asm/bug.h:81,
from include/linux/bug.h:4,
from include/linux/scatterlist.h:6,
from include/linux/dma-buf.h:29,
from drivers/gpu/drm/vc4/vc4_bo.c:22:
   drivers/gpu/drm/vc4/vc4_bo.c: In function 'vc4_bo_stats_dump':
   include/linux/kern_levels.h:4:18: warning: format '%d' expects argument of 
type 'int', but argument 3 has type 'size_t {aka long unsigned int}' [-Wformat=]
#define KERN_SOH "\001"  /* ASCII Start Of Header */
 ^
   include/linux/kern_levels.h:13:19: note: in expansion of macro 'KERN_SOH'
#define KERN_INFO KERN_SOH "6" /* informational */
  ^~~~
>> include/drm/drmP.h:150:16: note: in expansion of macro 'KERN_INFO'
  printk##once(KERN_##level "[" DRM_NAME "] " fmt, \
   ^
>> include/drm/drmP.h:155:2: note: in expansion of macro '_DRM_PRINTK'
 _DRM_PRINTK(, INFO, fmt, ##__VA_ARGS__)
 ^~~
   drivers/gpu/drm/vc4/vc4_bo.c:66:3: note: in expansion of macro 'DRM_INFO'
  DRM_INFO("%30s: %6dkb BOs (%d)\n", "userspace BO cache",
  ^~~~
   include/linux/kern_levels.h:4:18: warning: format '%d' expects argument of 
type 'int', but argument 3 has type 'size_t {aka long unsigned int}' [-Wformat=]
#define KERN_SOH "\001"  /* ASCII Start Of Header */
 ^
   include/linux/kern_levels.h:13:19: no

Re: [PATCH v2] drm/vc4: Add the DRM_IOCTL_VC4_GEM_MADVISE ioctl

2017-10-12 Thread Eric Anholt
Boris Brezillon  writes:

> This ioctl will allow us to purge inactive userspace buffers when the
> system is running out of contiguous memory.
>
> For now, the purge logic is rather dumb in that it does not try to
> release only the amount of BO needed to meet the last CMA alloc request
> but instead purges all objects placed in the purgeable pool as soon as
> we experience a CMA allocation failure.
>
> Note that the in-kernel BO cache is always purged before the purgeable
> cache because those objects are known to be unused while objects marked
> as purgeable by a userspace application/library might have to be
> restored when they are marked back as unpurgeable, which can be
> expensive.
>
> Signed-off-by: Boris Brezillon 
> ---
> Hello,
>
> Updates to libdrm, mesa and igt making use of this kernel feature can
> be found on my github repos [1][2][3].
>
> There's currently no debugfs hook to manually force a purge, but this
> is being discussed and might be added later on.
>
> Regards,
>
> Boris
>
> [1]https://github.com/bbrezillon/drm/tree/vc4/purgeable
> [2]https://github.com/bbrezillon/mesa/tree/vc4/purgeable
> [3]https://github.com/bbrezillon/igt/tree/vc4/purgeable
>
> Changes in v2:
> - Do not re-allocate BO's memory after when WILLNEED is asked after a purge
> - Add purgeable/purged stats to debugfs and dumpstats
> - Pad the drm_vc4_gem_madvise to make it 64-bit aligned
> - Forbid madvise calls on internal BO objects (everything that is not
>   meant to be exposed to userspace)
> - Do not increment/decrement usecnt on internal BOs (they cannot be marked
>   purgeable, so doing that is useless and error prone)
> - Fix a few bugs related to usecnt refcounting
> ---
>  drivers/gpu/drm/vc4/vc4_bo.c| 284 
> ++--
>  drivers/gpu/drm/vc4/vc4_drv.c   |  10 +-
>  drivers/gpu/drm/vc4/vc4_drv.h   |  44 +++
>  drivers/gpu/drm/vc4/vc4_gem.c   | 153 +-
>  drivers/gpu/drm/vc4/vc4_plane.c |  20 +++
>  include/uapi/drm/vc4_drm.h  |  18 +++
>  6 files changed, 512 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c
> index 3afdbf4bc10b..edb0062a58c7 100644
> --- a/drivers/gpu/drm/vc4/vc4_bo.c
> +++ b/drivers/gpu/drm/vc4/vc4_bo.c
> @@ -42,7 +42,8 @@ static bool is_user_label(int label)
>  
>  static void vc4_bo_stats_dump(struct vc4_dev *vc4)
>  {
> - int i;
> + size_t purgeable_size, purged_size;
> + int i, npurgeable, npurged;
>  
>   for (i = 0; i < vc4->num_labels; i++) {
>   if (!vc4->bo_labels[i].num_allocated)
> @@ -53,6 +54,21 @@ static void vc4_bo_stats_dump(struct vc4_dev *vc4)
>vc4->bo_labels[i].size_allocated / 1024,
>vc4->bo_labels[i].num_allocated);
>   }
> +
> + mutex_lock(&vc4->purgeable.lock);
> + npurgeable = vc4->purgeable.num;
> + purgeable_size = vc4->purgeable.size;
> + purged_size = vc4->purgeable.purged_size;
> + npurged = vc4->purgeable.purged_num;
> + mutex_unlock(&vc4->purgeable.lock);
> +
> + if (npurgeable)
> + DRM_INFO("%30s: %6dkb BOs (%d)\n", "userspace BO cache",
> +  purgeable_size / 1024, npurgeable);
> +
> + if (npurged)
> + DRM_INFO("%30s: %6dkb BOs (%d)\n", "total purged BO",
> +  purged_size / 1024, npurged);
>  }

We had a discussion about this on IRC -- I was trying to go for using
the debug labeling stuff instead of maintaining separate stats.
Apparently the "total purged ever" stats have been really useful, though
(since a BO is probably removed soon after being purged), so we're
sticking with it.  Also, losing existing debug labels on
GL_APPLE_object_purgeable objects wouldn't be great.

> +static inline bool vc4_bo_supports_madv(const struct vc4_bo *bo)
> +{
> + switch (bo->label) {
> + case VC4_BO_TYPE_V3D:
> + case VC4_BO_TYPE_V3D_SHADER:
> + case VC4_BO_TYPE_DUMB:
> + return true;
> + default:
> + break;
> + }
> +
> + return false;
> +}

This function is a problem -- we're basing usecnt management elsewhere
on its return value, but these labels are supposed to only be debug
information.  Notably, userspace could call the label ioctl with the
same string as one of the kernel names and get the BO to have one of the
kernel's permanent label numbers, mixing up the usecnts.

>  struct vc4_fence {
>   struct dma_fence base;
>   struct drm_device *dev;
> @@ -503,6 +540,7 @@ int vc4_get_hang_state_ioctl(struct drm_device *dev, void 
> *data,
>struct drm_file *file_priv);
>  int vc4_label_bo_ioctl(struct drm_device *dev, void *data,
>  struct drm_file *file_priv);
> +int vc4_fault(struct vm_fault *vmf);
>  int vc4_mmap(struct file *filp, struct vm_area_struct *vma);
>  struct reservation_object *vc4_prime_res_obj(struct drm_gem_object *obj);
>  int vc4_prime_mmap(struct drm_gem_ob