Hi, Tiago.

On 08/26/2015 02:02 AM, Tiago Vignatti wrote:
> From: Daniel Vetter <daniel.vet...@ffwll.ch>
>
> The userspace might need some sort of cache coherency management e.g. when CPU
> and GPU domains are being accessed through dma-buf at the same time. To
> circumvent this problem there are begin/end coherency markers, that forward
> directly to existing dma-buf device drivers vfunc hooks. Userspace can make 
> use
> of those markers through the DMA_BUF_IOCTL_SYNC ioctl. The sequence would be
> used like following:
>
>   - mmap dma-buf fd
>   - for each drawing/upload cycle in CPU
>     1. SYNC_START ioctl
>     2. read/write to mmap area or a 2d sub-region of it
>     3. SYNC_END ioctl.
>   - munamp once you don't need the buffer any more
>
> v2 (Tiago): Fix header file type names (u64 -> __u64)
> v3 (Tiago): Add documentation. Use enum dma_buf_sync_flags to the begin/end
> dma-buf functions. Check for overflows in start/length.
> v4 (Tiago): use 2d regions for sync.

Daniel V had issues with the sync argument proposed by Daniel S. I'm
fine with that argument or an argument containing only a single sync
rect. I'm not sure whether Daniel V will find it easier to accept only a
single sync rect...

Also please see below.

>
> Cc: Sumit Semwal <sumit.sem...@linaro.org>
> Signed-off-by: Daniel Vetter <daniel.vet...@intel.com>
> Signed-off-by: Tiago Vignatti <tiago.vigna...@intel.com>
> ---
>
> I'm unable to test the 2d sync properly, because begin/end access in i915
> don't track mapped range for nothing.
>
>  Documentation/dma-buf-sharing.txt      | 13 ++++++
>  drivers/dma-buf/dma-buf.c              | 77 
> ++++++++++++++++++++++++++++------
>  drivers/gpu/drm/i915/i915_gem_dmabuf.c |  6 ++-
>  include/linux/dma-buf.h                | 20 +++++----
>  include/uapi/linux/dma-buf.h           | 57 +++++++++++++++++++++++++
>  5 files changed, 150 insertions(+), 23 deletions(-)
>  create mode 100644 include/uapi/linux/dma-buf.h
>
> diff --git a/Documentation/dma-buf-sharing.txt 
> b/Documentation/dma-buf-sharing.txt
> index 480c8de..8061ac0 100644
> --- a/Documentation/dma-buf-sharing.txt
> +++ b/Documentation/dma-buf-sharing.txt
> @@ -355,6 +355,19 @@ Being able to mmap an export dma-buf buffer object has 2 
> main use-cases:
>  
>     No special interfaces, userspace simply calls mmap on the dma-buf fd.
>  
> +   Also, the userspace might need some sort of cache coherency management 
> e.g.
> +   when CPU and GPU domains are being accessed through dma-buf at the same
> +   time. To circumvent this problem there are begin/end coherency markers, 
> that
> +   forward directly to existing dma-buf device drivers vfunc hooks. Userspace
> +   can make use of those markers through the DMA_BUF_IOCTL_SYNC ioctl. The
> +   sequence would be used like following:
> +     - mmap dma-buf fd
> +     - for each drawing/upload cycle in CPU
> +       1. SYNC_START ioctl
> +       2. read/write to mmap area or a 2d sub-region of it
> +       3. SYNC_END ioctl.
> +     - munamp once you don't need the buffer any more
> +
>  2. Supporting existing mmap interfaces in importers
>  
>     Similar to the motivation for kernel cpu access it is again important that
> diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
> index 155c146..b6a4a06 100644
> --- a/drivers/dma-buf/dma-buf.c
> +++ b/drivers/dma-buf/dma-buf.c
> @@ -251,11 +251,55 @@ out:
>       return events;
>  }
>  
> +static long dma_buf_ioctl(struct file *file,
> +                       unsigned int cmd, unsigned long arg)
> +{
> +     struct dma_buf *dmabuf;
> +     struct dma_buf_sync sync;
> +     enum dma_data_direction direction;
> +
> +     dmabuf = file->private_data;
> +
> +     if (!is_dma_buf_file(file))
> +             return -EINVAL;
> +
> +     if (cmd != DMA_BUF_IOCTL_SYNC)
> +             return -ENOTTY;
> +
> +     if (copy_from_user(&sync, (void __user *) arg, sizeof(sync)))
> +             return -EFAULT;

Do we actually copy all sync regions here?

> +
> +     if (sync.flags & DMA_BUF_SYNC_RW)
> +             direction = DMA_BIDIRECTIONAL;
> +     else if (sync.flags & DMA_BUF_SYNC_READ)
> +             direction = DMA_FROM_DEVICE;
> +     else if (sync.flags & DMA_BUF_SYNC_WRITE)
> +             direction = DMA_TO_DEVICE;
> +     else
> +             return -EINVAL;
> +
> +     if (sync.flags & ~DMA_BUF_SYNC_VALID_FLAGS_MASK)
> +             return -EINVAL;
> +
> +     /* TODO: check for overflowing the buffer's size - how so, checking 
> region by
> +      * region here? Maybe need to check for the other parameters as well. */
> +
> +     if (sync.flags & DMA_BUF_SYNC_END)
> +             dma_buf_end_cpu_access(dmabuf, sync.stride_bytes, 
> sync.bytes_per_pixel,
> +                             sync.num_regions, sync.regions, direction);
> +     else
> +             dma_buf_begin_cpu_access(dmabuf, sync.stride_bytes, 
> sync.bytes_per_pixel,
> +                             sync.num_regions, sync.regions, direction);
> +
> +     return 0;
> +}
> +
>  static const struct file_operations dma_buf_fops = {
>       .release        = dma_buf_release,
>       .mmap           = dma_buf_mmap_internal,
>       .llseek         = dma_buf_llseek,
>       .poll           = dma_buf_poll,
> +     .unlocked_ioctl = dma_buf_ioctl,
>  };
>  
>  /*
> @@ -539,14 +583,17 @@ EXPORT_SYMBOL_GPL(dma_buf_unmap_attachment);
>   * preparations. Coherency is only guaranteed in the specified range for the
>   * specified access direction.
>   * @dmabuf:  [in]    buffer to prepare cpu access for.
> - * @start:   [in]    start of range for cpu access.
> - * @len:     [in]    length of range for cpu access.
> - * @direction:       [in]    length of range for cpu access.
> + * @stride_bytes:    [in]    stride in bytes for cpu access.
> + * @bytes_per_pixel: [in]    bytes per pixel of the region for cpu access.
> + * @num_regions:   [in]  number of regions.
> + * @region:   [in] vector of 2-dimensional regions for cpu access.
> + * @direction:       [in]    direction of range for cpu access.
>   *
>   * Can return negative error values, returns 0 on success.
>   */
> -int dma_buf_begin_cpu_access(struct dma_buf *dmabuf, size_t start, size_t 
> len,
> -                          enum dma_data_direction direction)
> +int dma_buf_begin_cpu_access(struct dma_buf *dmabuf, size_t stride_bytes,
> +     size_t bytes_per_pixel, size_t num_regions, struct dma_buf_sync_region 
> regions[],
> +     enum dma_data_direction direction)
>  {
>       int ret = 0;
>  
> @@ -554,8 +601,8 @@ int dma_buf_begin_cpu_access(struct dma_buf *dmabuf, 
> size_t start, size_t len,
>               return -EINVAL;
>  
>       if (dmabuf->ops->begin_cpu_access)
> -             ret = dmabuf->ops->begin_cpu_access(dmabuf, start,
> -                                                     len, direction);
> +             ret = dmabuf->ops->begin_cpu_access(dmabuf, stride_bytes, 
> bytes_per_pixel,
> +                                                     num_regions, regions, 
> direction);
>  
>       return ret;
>  }
> @@ -567,19 +614,23 @@ EXPORT_SYMBOL_GPL(dma_buf_begin_cpu_access);
>   * actions. Coherency is only guaranteed in the specified range for the
>   * specified access direction.
>   * @dmabuf:  [in]    buffer to complete cpu access for.
> - * @start:   [in]    start of range for cpu access.
> - * @len:     [in]    length of range for cpu access.
> - * @direction:       [in]    length of range for cpu access.
> + * @stride_bytes:    [in]    stride in bytes for cpu access.
> + * @bytes_per_pixel: [in]    bytes per pixel of the region for cpu access.
> + * @num_regions:   [in]  number of regions.
> + * @regions:   [in]  vector of 2-dimensional regions for cpu access.
> + * @direction:       [in]    direction of range for cpu access.
>   *
>   * This call must always succeed.
>   */
> -void dma_buf_end_cpu_access(struct dma_buf *dmabuf, size_t start, size_t len,
> -                         enum dma_data_direction direction)
> +void dma_buf_end_cpu_access(struct dma_buf *dmabuf, size_t stride_bytes,
> +     size_t bytes_per_pixel, size_t num_regions, struct dma_buf_sync_region 
> regions[],
> +     enum dma_data_direction direction)
>  {
>       WARN_ON(!dmabuf);
>  
>       if (dmabuf->ops->end_cpu_access)
> -             dmabuf->ops->end_cpu_access(dmabuf, start, len, direction);
> +             dmabuf->ops->end_cpu_access(dmabuf, stride_bytes, 
> bytes_per_pixel,
> +                     num_regions, regions, direction);
>  }
>  EXPORT_SYMBOL_GPL(dma_buf_end_cpu_access);
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c 
> b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> index 95cbfff..e5bb7a3 100644
> --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> @@ -212,7 +212,8 @@ static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, 
> struct vm_area_struct *
>       return 0;
>  }
>  
> -static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, size_t start, 
> size_t length, enum dma_data_direction direction)
> +static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, size_t 
> stride_bytes, size_t bytes_per_pixel,
> +             size_t num_regions, struct dma_buf_sync_region regions[], enum 
> dma_data_direction direction)
>  {
>       struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
>       struct drm_device *dev = obj->base.dev;
> @@ -228,7 +229,8 @@ static int i915_gem_begin_cpu_access(struct dma_buf 
> *dma_buf, size_t start, size
>       return ret;
>  }
>  
> -static void i915_gem_end_cpu_access(struct dma_buf *dma_buf, size_t start, 
> size_t length, enum dma_data_direction direction)
> +static void i915_gem_end_cpu_access(struct dma_buf *dma_buf, size_t 
> stride_bytes, size_t bytes_per_pixel,
> +             size_t num_regions, struct dma_buf_sync_region regions[], enum 
> dma_data_direction direction)
>  {
>       struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
>       struct drm_device *dev = obj->base.dev;
> diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
> index f98bd70..ed457cb 100644
> --- a/include/linux/dma-buf.h
> +++ b/include/linux/dma-buf.h
> @@ -33,6 +33,8 @@
>  #include <linux/fence.h>
>  #include <linux/wait.h>
>  
> +#include <uapi/linux/dma-buf.h>
> +
>  struct device;
>  struct dma_buf;
>  struct dma_buf_attachment;
> @@ -93,10 +95,10 @@ struct dma_buf_ops {
>       /* after final dma_buf_put() */
>       void (*release)(struct dma_buf *);
>  
> -     int (*begin_cpu_access)(struct dma_buf *, size_t, size_t,
> -                             enum dma_data_direction);
> -     void (*end_cpu_access)(struct dma_buf *, size_t, size_t,
> -                            enum dma_data_direction);
> +     int (*begin_cpu_access)(struct dma_buf *, size_t, size_t, size_t,
> +                             struct dma_buf_sync_region [], enum 
> dma_data_direction);
> +     void (*end_cpu_access)(struct dma_buf *, size_t, size_t, size_t,
> +                            struct dma_buf_sync_region [], enum 
> dma_data_direction);
>       void *(*kmap_atomic)(struct dma_buf *, unsigned long);
>       void (*kunmap_atomic)(struct dma_buf *, unsigned long, void *);
>       void *(*kmap)(struct dma_buf *, unsigned long);
> @@ -224,10 +226,12 @@ struct sg_table *dma_buf_map_attachment(struct 
> dma_buf_attachment *,
>                                       enum dma_data_direction);
>  void dma_buf_unmap_attachment(struct dma_buf_attachment *, struct sg_table *,
>                               enum dma_data_direction);
> -int dma_buf_begin_cpu_access(struct dma_buf *dma_buf, size_t start, size_t 
> len,
> -                          enum dma_data_direction dir);
> -void dma_buf_end_cpu_access(struct dma_buf *dma_buf, size_t start, size_t 
> len,
> -                         enum dma_data_direction dir);
> +int dma_buf_begin_cpu_access(struct dma_buf *dma_buf, size_t stride_bytes,
> +                          size_t bytes_per_pixel, size_t num_regions,
> +                          struct dma_buf_sync_region regions[], enum 
> dma_data_direction dir);
> +void dma_buf_end_cpu_access(struct dma_buf *dma_buf, size_t stride_bytes,
> +                          size_t bytes_per_pixel, size_t num_regions,
> +                          struct dma_buf_sync_region regions[], enum 
> dma_data_direction dir);
>  void *dma_buf_kmap_atomic(struct dma_buf *, unsigned long);
>  void dma_buf_kunmap_atomic(struct dma_buf *, unsigned long, void *);
>  void *dma_buf_kmap(struct dma_buf *, unsigned long);
> diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h
> new file mode 100644
> index 0000000..c63b578
> --- /dev/null
> +++ b/include/uapi/linux/dma-buf.h
> @@ -0,0 +1,57 @@
> +/*
> + * Framework for buffer objects that can be shared across devices/subsystems.
> + *
> + * Copyright(C) 2015 Intel Ltd
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms of the GNU General Public License version 2 as published 
> by
> + * the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful, but 
> WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public License along 
> with
> + * this program.  If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#ifndef _DMA_BUF_UAPI_H_
> +#define _DMA_BUF_UAPI_H_
> +
> +enum dma_buf_sync_flags {
> +     DMA_BUF_SYNC_READ = (1 << 0),
> +     DMA_BUF_SYNC_WRITE = (2 << 0),
> +     DMA_BUF_SYNC_RW = (3 << 0),
> +     DMA_BUF_SYNC_START = (0 << 2),
> +     DMA_BUF_SYNC_END = (1 << 2),
> +
> +     DMA_BUF_SYNC_VALID_FLAGS_MASK = DMA_BUF_SYNC_RW |
> +             DMA_BUF_SYNC_END
> +};
> +
> +/* 2-dimensional region, used for multi-range flush. This can be used to
> + * synchronize the CPU by batching several sub-regions, smaller than the
> + * mapped dma-buf, all at once. */
> +struct dma_buf_sync_region {
> +     __u64 x;
> +     __u64 y;
> +     __u64 width;
> +     __u64 height;
> +};
> +
> +/* begin/end dma-buf functions used for userspace mmap. */
> +struct dma_buf_sync {
> +     enum dma_buf_sync_flags flags;
> +
> +     __u64 stride_bytes;
> +     __u32 bytes_per_pixel;
> +     __u32 num_regions;
> +
> +     struct dma_buf_sync_region regions[];
> +};
> +
> +#define DMA_BUF_BASE         'b'
> +#define DMA_BUF_IOCTL_SYNC   _IOWR(DMA_BUF_BASE, 0, struct dma_buf_sync)

Should be _IOW(  ?


> +
> +#endif

Thomas
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to