[PATCH v3] media: staging: tegra-vde: Replace debug messages with trace points

2018-11-19 Thread Dmitry Osipenko
Trace points are much more efficient than debug messages for intensive
tracing and could be conveniently enabled / disabled dynamically, hence
let's replace debug messages with the trace points. This also makes
code a bit cleaner.

Signed-off-by: Dmitry Osipenko 
---

Changelog:

v3: - Basically a RE-SEND of v2 with some very minor code reshuffling.

v2:
- Use __assign_str() for copying of HW sub-engine name during of
  tracing. There is no functional changes since V1, that's just a
  bit better variant of the patch that doesn't rely on stopping
  tracing before releasing of managed resources (struct tegra_vde).

 drivers/staging/media/tegra-vde/tegra-vde.c | 222 +++-
 drivers/staging/media/tegra-vde/trace.h |  93 
 2 files changed, 220 insertions(+), 95 deletions(-)
 create mode 100644 drivers/staging/media/tegra-vde/trace.h

diff --git a/drivers/staging/media/tegra-vde/tegra-vde.c 
b/drivers/staging/media/tegra-vde/tegra-vde.c
index 66cf14212c14..aa6c6bba961e 100644
--- a/drivers/staging/media/tegra-vde/tegra-vde.c
+++ b/drivers/staging/media/tegra-vde/tegra-vde.c
@@ -35,14 +35,6 @@
 #define BSE_ICMDQUE_EMPTY  BIT(3)
 #define BSE_DMA_BUSY   BIT(23)
 
-#define VDE_WR(__data, __addr) \
-do {   \
-   dev_dbg(vde->miscdev.parent,\
-   "%s: %d: 0x%08X => " #__addr ")\n", \
-   __func__, __LINE__, (u32)(__data)); \
-   writel_relaxed(__data, __addr); \
-} while (0)
-
 struct video_frame {
struct dma_buf_attachment *y_dmabuf_attachment;
struct dma_buf_attachment *cb_dmabuf_attachment;
@@ -81,12 +73,66 @@ struct tegra_vde {
u32 *iram;
 };
 
+static __maybe_unused char const *
+tegra_vde_reg_base_name(struct tegra_vde *vde, void __iomem *base)
+{
+   if (vde->sxe == base)
+   return "SXE";
+
+   if (vde->bsev == base)
+   return "BSEV";
+
+   if (vde->mbe == base)
+   return "MBE";
+
+   if (vde->ppe == base)
+   return "PPE";
+
+   if (vde->mce == base)
+   return "MCE";
+
+   if (vde->tfe == base)
+   return "TFE";
+
+   if (vde->ppb == base)
+   return "PPB";
+
+   if (vde->vdma == base)
+   return "VDMA";
+
+   if (vde->frameid == base)
+   return "FRAMEID";
+
+   return "???";
+}
+
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+
+static void tegra_vde_writel(struct tegra_vde *vde,
+u32 value, void __iomem *base, u32 offset)
+{
+   trace_vde_writel(vde, base, offset, value);
+
+   writel_relaxed(value, base + offset);
+}
+
+static u32 tegra_vde_readl(struct tegra_vde *vde,
+  void __iomem *base, u32 offset)
+{
+   u32 value = readl_relaxed(base + offset);
+
+   trace_vde_readl(vde, base, offset, value);
+
+   return value;
+}
+
 static void tegra_vde_set_bits(struct tegra_vde *vde,
-  u32 mask, void __iomem *regs)
+  u32 mask, void __iomem *base, u32 offset)
 {
-   u32 value = readl_relaxed(regs);
+   u32 value = tegra_vde_readl(vde, base, offset);
 
-   VDE_WR(value | mask, regs);
+   tegra_vde_writel(vde, value | mask, base, offset);
 }
 
 static int tegra_vde_wait_mbe(struct tegra_vde *vde)
@@ -107,8 +153,8 @@ static int tegra_vde_setup_mbe_frame_idx(struct tegra_vde 
*vde,
unsigned int idx;
int err;
 
-   VDE_WR(0xD000 | (0 << 23), vde->mbe + 0x80);
-   VDE_WR(0xD020 | (0 << 23), vde->mbe + 0x80);
+   tegra_vde_writel(vde, 0xD000 | (0 << 23), vde->mbe, 0x80);
+   tegra_vde_writel(vde, 0xD020 | (0 << 23), vde->mbe, 0x80);
 
err = tegra_vde_wait_mbe(vde);
if (err)
@@ -118,8 +164,10 @@ static int tegra_vde_setup_mbe_frame_idx(struct tegra_vde 
*vde,
return 0;
 
for (idx = 0, frame_idx = 1; idx < refs_nb; idx++, frame_idx++) {
-   VDE_WR(0xD000 | (frame_idx << 23), vde->mbe + 0x80);
-   VDE_WR(0xD020 | (frame_idx << 23), vde->mbe + 0x80);
+   tegra_vde_writel(vde, 0xD000 | (frame_idx << 23),
+vde->mbe, 0x80);
+   tegra_vde_writel(vde, 0xD020 | (frame_idx << 23),
+vde->mbe, 0x80);
 
frame_idx_enb_mask |= frame_idx << (6 * (idx % 4));
 
@@ -128,7 +176,7 @@ static int tegra_vde_setup_mbe_frame_idx(struct tegra_vde 
*vde,
value |= (idx >> 2) << 24;

Re: [PATCH 08/14] staging: media: tegra-vde: Track struct device *

2018-08-18 Thread Dmitry Osipenko
On 13.08.2018 17:50, Thierry Reding wrote:
> From: Thierry Reding 
> 
> The pointer to the struct device is frequently used, so store it in
> struct tegra_vde. Also, pass around a pointer to a struct tegra_vde
> instead of struct device in some cases to prepare for subsequent
> patches referencing additional data from that structure.
> 
> Signed-off-by: Thierry Reding 
> ---
>  drivers/staging/media/tegra-vde/tegra-vde.c | 63 -
>  1 file changed, 36 insertions(+), 27 deletions(-)
> 
> diff --git a/drivers/staging/media/tegra-vde/tegra-vde.c 
> b/drivers/staging/media/tegra-vde/tegra-vde.c
> index 41cf86dc5dbd..2496a03fd158 100644
> --- a/drivers/staging/media/tegra-vde/tegra-vde.c
> +++ b/drivers/staging/media/tegra-vde/tegra-vde.c
> @@ -71,6 +71,7 @@ struct tegra_vde_soc {
>  };
>  
>  struct tegra_vde {
> + struct device *dev;
>   const struct tegra_vde_soc *soc;
>   void __iomem *sxe;
>   void __iomem *bsev;
> @@ -644,7 +645,7 @@ static void tegra_vde_detach_and_put_dmabuf(struct 
> dma_buf_attachment *a,
>   dma_buf_put(dmabuf);
>  }
>  
> -static int tegra_vde_attach_dmabuf(struct device *dev,
> +static int tegra_vde_attach_dmabuf(struct tegra_vde *vde,
>  int fd,
>  unsigned long offset,
>  size_t min_size,
> @@ -662,38 +663,40 @@ static int tegra_vde_attach_dmabuf(struct device *dev,
>  
>   dmabuf = dma_buf_get(fd);
>   if (IS_ERR(dmabuf)) {
> - dev_err(dev, "Invalid dmabuf FD: %d\n", fd);
> + dev_err(vde->dev, "Invalid dmabuf FD: %d\n", fd);
>   return PTR_ERR(dmabuf);
>   }
>  
>   if (dmabuf->size & (align_size - 1)) {
> - dev_err(dev, "Unaligned dmabuf 0x%zX, should be aligned to 
> 0x%zX\n",
> + dev_err(vde->dev,
> + "Unaligned dmabuf 0x%zX, should be aligned to 0x%zX\n",
>   dmabuf->size, align_size);
>   return -EINVAL;
>   }
>  
>   if ((u64)offset + min_size > dmabuf->size) {
> - dev_err(dev, "Too small dmabuf size %zu @0x%lX, should be at 
> least %zu\n",
> + dev_err(vde->dev,
> + "Too small dmabuf size %zu @0x%lX, should be at least 
> %zu\n",
>   dmabuf->size, offset, min_size);
>   return -EINVAL;
>   }
>  
> - attachment = dma_buf_attach(dmabuf, dev);
> + attachment = dma_buf_attach(dmabuf, vde->dev);
>   if (IS_ERR(attachment)) {
> - dev_err(dev, "Failed to attach dmabuf\n");
> + dev_err(vde->dev, "Failed to attach dmabuf\n");
>   err = PTR_ERR(attachment);
>   goto err_put;
>   }
>  
>   sgt = dma_buf_map_attachment(attachment, dma_dir);
>   if (IS_ERR(sgt)) {
> - dev_err(dev, "Failed to get dmabufs sg_table\n");
> + dev_err(vde->dev, "Failed to get dmabufs sg_table\n");
>   err = PTR_ERR(sgt);
>   goto err_detach;
>   }
>  
>   if (sgt->nents != 1) {
> - dev_err(dev, "Sparse DMA region is unsupported\n");
> + dev_err(vde->dev, "Sparse DMA region is unsupported\n");
>   err = -EINVAL;
>   goto err_unmap;
>   }
> @@ -717,7 +720,7 @@ static int tegra_vde_attach_dmabuf(struct device *dev,
>   return err;
>  }
>  
> -static int tegra_vde_attach_dmabufs_to_frame(struct device *dev,
> +static int tegra_vde_attach_dmabufs_to_frame(struct tegra_vde *vde,
>struct video_frame *frame,
>struct tegra_vde_h264_frame *src,
>enum dma_data_direction dma_dir,
> @@ -726,7 +729,7 @@ static int tegra_vde_attach_dmabufs_to_frame(struct 
> device *dev,
>  {
>   int err;
>  
> - err = tegra_vde_attach_dmabuf(dev, src->y_fd,
> + err = tegra_vde_attach_dmabuf(vde, src->y_fd,
> src->y_offset, lsize, SZ_256,
> >y_dmabuf_attachment,
> >y_addr,
> @@ -735,7 +738,7 @@ static int tegra_vde_attach_dmabufs_to_frame(struct 
> device *dev,
>   if (err)
>   return err;
>  
> - err = tegra_vde_attach_dmabuf(dev, src->cb_fd,
> + err = tegra_vde_attach_dmabuf(vde, src->cb_fd,
> src->cb_offset, csize, SZ_256,
> >cb_dmabuf_attachment,
> >cb_addr,
> @@ -744,7 +747,7 @@ static int tegra_vde_attach_dmabufs_to_frame(struct 
> device *dev,
>   if (err)
>   goto err_release_y;
>  
> - err = tegra_vde_attach_dmabuf(dev, src->cr_fd,
> + err = tegra_vde_attach_dmabuf(vde, src->cr_fd,
> src->cr_offset, csize, SZ_256,
> 

Re: [PATCH 09/14] staging: media: tegra-vde: Add IOMMU support

2018-08-18 Thread Dmitry Osipenko
On 13.08.2018 17:50, Thierry Reding wrote:
> From: Thierry Reding 
> 
> Implement support for using an IOMMU to map physically discontiguous
> buffers into contiguous I/O virtual mappings that the VDE can use. This
> allows importing arbitrary DMA-BUFs for use by the VDE.
> 
> While at it, make sure that the device is detached from any DMA/IOMMU
> mapping that it might have automatically been attached to at boot. If
> using the IOMMU API explicitly, detaching from any existing mapping is
> required to avoid double mapping of buffers.
> 
> Signed-off-by: Thierry Reding 
> ---
>  drivers/staging/media/tegra-vde/tegra-vde.c | 171 +---
>  1 file changed, 153 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/staging/media/tegra-vde/tegra-vde.c 
> b/drivers/staging/media/tegra-vde/tegra-vde.c
> index 2496a03fd158..3bc0bfcfe34e 100644
> --- a/drivers/staging/media/tegra-vde/tegra-vde.c
> +++ b/drivers/staging/media/tegra-vde/tegra-vde.c
> @@ -13,7 +13,9 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -22,6 +24,10 @@
>  #include 
>  #include 
>  
> +#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)
> +#include 
> +#endif
> +
>  #include 
>  
>  #include 
> @@ -61,6 +67,11 @@ struct video_frame {
>   u32 frame_num;
>   u32 flags;
>   u64 modifier;
> +
> + struct iova *y_iova;
> + struct iova *cb_iova;
> + struct iova *cr_iova;
> + struct iova *aux_iova;
>  };
>  
>  struct tegra_vde_soc {
> @@ -93,6 +104,12 @@ struct tegra_vde {
>   struct clk *clk_bsev;
>   dma_addr_t iram_lists_addr;
>   u32 *iram;
> +
> + struct iommu_domain *domain;
> + struct iommu_group *group;
> + struct iova_domain iova;
> + unsigned long limit;
> + unsigned int shift;
>  };
>  
>  static void tegra_vde_set_bits(struct tegra_vde *vde,
> @@ -634,12 +651,22 @@ static void tegra_vde_decode_frame(struct tegra_vde 
> *vde,
>   VDE_WR(0x2000 | (macroblocks_nb - 1), vde->sxe + 0x00);
>  }
>  
> -static void tegra_vde_detach_and_put_dmabuf(struct dma_buf_attachment *a,
> +static void tegra_vde_detach_and_put_dmabuf(struct tegra_vde *vde,
> + struct dma_buf_attachment *a,
>   struct sg_table *sgt,
> + struct iova *iova,
>   enum dma_data_direction dma_dir)
>  {
>   struct dma_buf *dmabuf = a->dmabuf;
>  
> + if (vde->domain) {
> + unsigned long size = iova_size(iova) << vde->shift;
> + dma_addr_t addr = iova_dma_addr(>iova, iova);
> +
> + iommu_unmap(vde->domain, addr, size);
> + __free_iova(>iova, iova);
> + }
> +
>   dma_buf_unmap_attachment(a, sgt, dma_dir);
>   dma_buf_detach(dmabuf, a);
>   dma_buf_put(dmabuf);
> @@ -651,14 +678,16 @@ static int tegra_vde_attach_dmabuf(struct tegra_vde 
> *vde,
>  size_t min_size,
>  size_t align_size,
>  struct dma_buf_attachment **a,
> -dma_addr_t *addr,
> +dma_addr_t *addrp,
>  struct sg_table **s,
> -size_t *size,
> +struct iova **iovap,
> +size_t *sizep,
>  enum dma_data_direction dma_dir)
>  {
>   struct dma_buf_attachment *attachment;
>   struct dma_buf *dmabuf;
>   struct sg_table *sgt;
> + size_t size;
>   int err;
>  
>   dmabuf = dma_buf_get(fd);
> @@ -695,18 +724,47 @@ static int tegra_vde_attach_dmabuf(struct tegra_vde 
> *vde,
>   goto err_detach;
>   }
>  
> - if (sgt->nents != 1) {
> + if (sgt->nents > 1 && !vde->domain) {
>   dev_err(vde->dev, "Sparse DMA region is unsupported\n");
>   err = -EINVAL;
>   goto err_unmap;
>   }
>  
> - *addr = sg_dma_address(sgt->sgl) + offset;
> + if (vde->domain) {
> + int prot = IOMMU_READ | IOMMU_WRITE;
> + struct iova *iova;
> + dma_addr_t addr;
> +
> + size = (dmabuf->size - offset) >> vde->shift;
> +
> + iova = alloc_iova(>iova, size, vde->limit - 1, true);
> + if (!iova) {
> + err = -ENOMEM;
> + goto err_unmap;
> + }
> +
> + addr = iova_dma_addr(>iova, iova);
> +
> + size = iommu_map_sg(vde->domain, addr, sgt->sgl, sgt->nents,
> + prot);
> + if (!size) {
> + __free_iova(>iova, iova);
> + err = -ENXIO;
> + goto err_unmap;
> + }
> +
> + *addrp = addr;
> + *iovap = iova;
> + } 

Re: [PATCH 09/14] staging: media: tegra-vde: Add IOMMU support

2018-08-18 Thread Dmitry Osipenko
On 13.08.2018 17:50, Thierry Reding wrote:
> From: Thierry Reding 
> 
> Implement support for using an IOMMU to map physically discontiguous
> buffers into contiguous I/O virtual mappings that the VDE can use. This
> allows importing arbitrary DMA-BUFs for use by the VDE.
> 
> While at it, make sure that the device is detached from any DMA/IOMMU
> mapping that it might have automatically been attached to at boot. If
> using the IOMMU API explicitly, detaching from any existing mapping is
> required to avoid double mapping of buffers.
> 
> Signed-off-by: Thierry Reding 
> ---
>  drivers/staging/media/tegra-vde/tegra-vde.c | 171 +---
>  1 file changed, 153 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/staging/media/tegra-vde/tegra-vde.c 
> b/drivers/staging/media/tegra-vde/tegra-vde.c
> index 2496a03fd158..3bc0bfcfe34e 100644
> --- a/drivers/staging/media/tegra-vde/tegra-vde.c
> +++ b/drivers/staging/media/tegra-vde/tegra-vde.c
> @@ -13,7 +13,9 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -22,6 +24,10 @@
>  #include 
>  #include 
>  
> +#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)
> +#include 
> +#endif
> +
>  #include 
>  
>  #include 
> @@ -61,6 +67,11 @@ struct video_frame {
>   u32 frame_num;
>   u32 flags;
>   u64 modifier;
> +
> + struct iova *y_iova;
> + struct iova *cb_iova;
> + struct iova *cr_iova;
> + struct iova *aux_iova;
>  };
>  
>  struct tegra_vde_soc {
> @@ -93,6 +104,12 @@ struct tegra_vde {
>   struct clk *clk_bsev;
>   dma_addr_t iram_lists_addr;
>   u32 *iram;
> +
> + struct iommu_domain *domain;
> + struct iommu_group *group;
> + struct iova_domain iova;
> + unsigned long limit;
> + unsigned int shift;
>  };
>  
>  static void tegra_vde_set_bits(struct tegra_vde *vde,
> @@ -634,12 +651,22 @@ static void tegra_vde_decode_frame(struct tegra_vde 
> *vde,
>   VDE_WR(0x2000 | (macroblocks_nb - 1), vde->sxe + 0x00);
>  }
>  
> -static void tegra_vde_detach_and_put_dmabuf(struct dma_buf_attachment *a,
> +static void tegra_vde_detach_and_put_dmabuf(struct tegra_vde *vde,
> + struct dma_buf_attachment *a,
>   struct sg_table *sgt,
> + struct iova *iova,
>   enum dma_data_direction dma_dir)
>  {
>   struct dma_buf *dmabuf = a->dmabuf;
>  
> + if (vde->domain) {
> + unsigned long size = iova_size(iova) << vde->shift;
> + dma_addr_t addr = iova_dma_addr(>iova, iova);
> +
> + iommu_unmap(vde->domain, addr, size);
> + __free_iova(>iova, iova);
> + }
> +
>   dma_buf_unmap_attachment(a, sgt, dma_dir);
>   dma_buf_detach(dmabuf, a);
>   dma_buf_put(dmabuf);
> @@ -651,14 +678,16 @@ static int tegra_vde_attach_dmabuf(struct tegra_vde 
> *vde,
>  size_t min_size,
>  size_t align_size,
>  struct dma_buf_attachment **a,
> -dma_addr_t *addr,
> +dma_addr_t *addrp,
>  struct sg_table **s,
> -size_t *size,
> +struct iova **iovap,
> +size_t *sizep,
>  enum dma_data_direction dma_dir)
>  {
>   struct dma_buf_attachment *attachment;
>   struct dma_buf *dmabuf;
>   struct sg_table *sgt;
> + size_t size;
>   int err;
>  
>   dmabuf = dma_buf_get(fd);
> @@ -695,18 +724,47 @@ static int tegra_vde_attach_dmabuf(struct tegra_vde 
> *vde,
>   goto err_detach;
>   }
>  
> - if (sgt->nents != 1) {
> + if (sgt->nents > 1 && !vde->domain) {
>   dev_err(vde->dev, "Sparse DMA region is unsupported\n");
>   err = -EINVAL;
>   goto err_unmap;
>   }
>  
> - *addr = sg_dma_address(sgt->sgl) + offset;
> + if (vde->domain) {
> + int prot = IOMMU_READ | IOMMU_WRITE;
> + struct iova *iova;
> + dma_addr_t addr;
> +
> + size = (dmabuf->size - offset) >> vde->shift;
> +
> + iova = alloc_iova(>iova, size, vde->limit - 1, true);
> + if (!iova) {
> + err = -ENOMEM;
> + goto err_unmap;
> + }
> +
> + addr = iova_dma_addr(>iova, iova);
> +
> + size = iommu_map_sg(vde->domain, addr, sgt->sgl, sgt->nents,
> + prot);
> + if (!size) {
> + __free_iova(>iova, iova);
> + err = -ENXIO;
> + goto err_unmap;
> + }
> +
> + *addrp = addr;
> + *iovap = iova;
> + } 

Re: [PATCH 04/14] staging: media: tegra-vde: Use DRM/KMS framebuffer modifiers

2018-08-18 Thread Dmitry Osipenko
On Monday, 13 August 2018 17:50:17 MSK Thierry Reding wrote:
> From: Thierry Reding 
> 
> VDE on Tegra20 through Tegra114 supports reading and writing frames in
> 16x16 tiled layout. Similarily, the various block-linear layouts that
> are supported by the GPU on Tegra124 can also be read from and written
> to by the Tegra124 VDE.
> 
> Enable userspace to specify the desired layout using the existing DRM
> framebuffer modifiers.
> 
> Signed-off-by: Thierry Reding 
> ---
>  drivers/staging/media/tegra-vde/tegra-vde.c | 112 +---
>  drivers/staging/media/tegra-vde/uapi.h  |   3 +-
>  2 files changed, 100 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/staging/media/tegra-vde/tegra-vde.c
> b/drivers/staging/media/tegra-vde/tegra-vde.c index
> 1a40f6dff7c8..275884e745df 100644
> --- a/drivers/staging/media/tegra-vde/tegra-vde.c
> +++ b/drivers/staging/media/tegra-vde/tegra-vde.c
> @@ -24,6 +24,8 @@
> 
>  #include 
> 
> +#include 
> +
>  #include "uapi.h"
> 
>  #define ICMDQUE_WR   0x00
> @@ -58,12 +60,14 @@ struct video_frame {
>   dma_addr_t aux_addr;
>   u32 frame_num;
>   u32 flags;
> + u64 modifier;
>  };
> 
>  struct tegra_vde_soc {
>   unsigned int num_ref_pics;
>   bool supports_ref_pic_marking;
>   bool supports_interlacing;
> + bool supports_block_linear;
>  };
> 
>  struct tegra_vde {
> @@ -202,6 +206,7 @@ static void tegra_vde_setup_frameid(struct tegra_vde
> *vde, unsigned int frameid,
>   u32 mbs_width, u32 mbs_height)
>  {
> + u64 modifier = frame ? frame->modifier : DRM_FORMAT_MOD_LINEAR;
>   u32 y_addr  = frame ? frame->y_addr  : 0x6CDEAD00;
>   u32 cb_addr = frame ? frame->cb_addr : 0x6CDEAD00;
>   u32 cr_addr = frame ? frame->cr_addr : 0x6CDEAD00;
> @@ -209,8 +214,12 @@ static void tegra_vde_setup_frameid(struct tegra_vde
> *vde, u32 value2 = frame ? mbs_width + 1) >> 1) << 6) | 1) : 0;
>   u32 value = y_addr >> 8;
> 
> - if (vde->soc->supports_interlacing)
> + if (!vde->soc->supports_interlacing) {
> + if (modifier == DRM_FORMAT_MOD_NVIDIA_TEGRA_TILED)
> + value |= BIT(31);
> + } else {
>   value |= BIT(31);
> + }
> 
>   VDE_WR(value,vde->frameid + 0x000 + frameid * 4);
>   VDE_WR(cb_addr >> 8, vde->frameid + 0x100 + frameid * 4);
> @@ -349,6 +358,37 @@ static void tegra_vde_setup_iram_tables(struct
> tegra_vde *vde, }
>  }
> 
> +static int tegra_vde_get_block_height(u64 modifier, unsigned int
> *block_height) +{
> + switch (modifier) {
> + case DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_ONE_GOB:
> + *block_height = 0;
> + return 0;
> +
> + case DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_TWO_GOB:
> + *block_height = 1;
> + return 0;
> +
> + case DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_FOUR_GOB:
> + *block_height = 2;
> + return 0;
> +
> + case DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_EIGHT_GOB:
> + *block_height = 3;
> + return 0;
> +
> + case DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_SIXTEEN_GOB:
> + *block_height = 4;
> + return 0;
> +
> + case DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_THIRTYTWO_GOB:
> + *block_height = 5;
> + return 0;
> + }
> +
> + return -EINVAL;
> +}
> +
>  static int tegra_vde_setup_hw_context(struct tegra_vde *vde,
> struct tegra_vde_h264_decoder_ctx *ctx,
> struct video_frame *dpb_frames,
> @@ -383,7 +423,21 @@ static int tegra_vde_setup_hw_context(struct tegra_vde
> *vde, tegra_vde_set_bits(vde, 0x0005, vde->vdma + 0x04);
> 
>   VDE_WR(0x, vde->vdma + 0x1C);
> - VDE_WR(0x, vde->vdma + 0x00);
> +
> + value = 0x;
> +
> + if (vde->soc->supports_block_linear) {
> + unsigned int block_height;
> +
> + err = tegra_vde_get_block_height(dpb_frames[0].modifier,
> +  _height);
> + if (err < 0)
> + return err;
> +
> + value |= block_height << 10;
> + }
> +
> + VDE_WR(value, vde->vdma + 0x00);
>   VDE_WR(0x0007, vde->vdma + 0x04);
>   VDE_WR(0x0007, vde->frameid + 0x200);
>   VDE_WR(0x0005, vde->tfe + 0x04);
> @@ -730,11 +784,37 @@ static void tegra_vde_release_frame_dmabufs(struct
> video_frame *frame, static int tegra_vde_validate_frame(struct device *dev,
>   struct tegra_vde_h264_frame *frame)
>  {
> + struct tegra_vde *vde = dev_get_drvdata(dev);
> +
>   if (frame->frame_num > 0x7F) {
>   dev_err(dev, "Bad frame_num %u\n", frame->frame_num);
>   return -EINVAL;
>   }
> 
> + if (vde->soc->supports_block_linear) {
> + switch (frame->modifier) {
> + case DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_ONE_GOB:
> + 

Re: [PATCH 10/14] staging: media: tegra-vde: Keep VDE in reset when unused

2018-08-18 Thread Dmitry Osipenko
On Monday, 13 August 2018 17:50:23 MSK Thierry Reding wrote:
> From: Thierry Reding 
> 
> There is no point in keeping the VDE module out of reset when it is not
> in use. Reset it on runtime suspend.
> 
> Signed-off-by: Thierry Reding 
> ---
>  drivers/staging/media/tegra-vde/tegra-vde.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/drivers/staging/media/tegra-vde/tegra-vde.c
> b/drivers/staging/media/tegra-vde/tegra-vde.c index
> 3bc0bfcfe34e..4b3c6ab3c77e 100644
> --- a/drivers/staging/media/tegra-vde/tegra-vde.c
> +++ b/drivers/staging/media/tegra-vde/tegra-vde.c
> @@ -1226,6 +1226,7 @@ static int tegra_vde_runtime_suspend(struct device
> *dev) }
> 
>   reset_control_assert(vde->rst_bsev);
> + reset_control_assert(vde->rst);
> 
>   usleep_range(2000, 4000);

There is also no point to reset VDE while it is powered off, then why do we 
that?




Re: [PATCH 07/14] staging: media: tegra-vde: Add some clarifying comments

2018-08-18 Thread Dmitry Osipenko
On Monday, 13 August 2018 17:50:20 MSK Thierry Reding wrote:
> From: Thierry Reding 
> 
> Add some comments specifying what tables are being set up in VRAM.
> 
> Signed-off-by: Thierry Reding 
> ---
>  drivers/staging/media/tegra-vde/tegra-vde.c | 19 +++
>  1 file changed, 19 insertions(+)
> 
> diff --git a/drivers/staging/media/tegra-vde/tegra-vde.c
> b/drivers/staging/media/tegra-vde/tegra-vde.c index
> 0adc603fa437..41cf86dc5dbd 100644
> --- a/drivers/staging/media/tegra-vde/tegra-vde.c
> +++ b/drivers/staging/media/tegra-vde/tegra-vde.c
> @@ -271,6 +271,7 @@ static void tegra_vde_setup_iram_tables(struct tegra_vde
> *vde, unsigned int i, k;
>   size_t size;
> 
> + /* clear H256RefPicList */
>   size = num_ref_pics * 4 * 8;
>   memset(vde->iram, 0, size);

H256? Is it a typo?

> 
> @@ -453,6 +454,7 @@ static int tegra_vde_setup_hw_context(struct tegra_vde
> *vde, VDE_WR(0x, vde->bsev + 0x98);
>   VDE_WR(0x0060, vde->bsev + 0x9C);
> 
> + /* clear H264MB2SliceGroupMap, assuming no FMO */
>   memset(vde->iram + 1024, 0, macroblocks_nb / 2);
> 
>   tegra_setup_frameidx(vde, dpb_frames, ctx->dpb_frames_nb,
> @@ -480,6 +482,8 @@ static int tegra_vde_setup_hw_context(struct tegra_vde
> *vde, if (err)
>   return err;
> 
> + /* upload H264MB2SliceGroupMap */
> + /* XXX don't hardcode map size? */
>   value = (0x20 << 26) | (0 << 25) | ((4096 >> 2) & 0x1fff);
>   err = tegra_vde_push_to_bsev_icmdqueue(vde, value, false);
>   if (err)
> @@ -492,6 +496,7 @@ static int tegra_vde_setup_hw_context(struct tegra_vde
> *vde, if (err)
>   return err;
> 
> + /* clear H264MBInfo XXX don't hardcode size */
>   value = (0x21 << 26) | ((240 & 0x1fff) << 12) | (0x54c & 0xfff);
>   err = tegra_vde_push_to_bsev_icmdqueue(vde, 0x840F054C, false);
>   if (err)
> @@ -499,6 +504,16 @@ static int tegra_vde_setup_hw_context(struct tegra_vde
> *vde,
> 
>   size = num_ref_pics * 4 * 8;
> 
> + /* clear H264RefPicList */

#if 0

> + value = (0x21 << 26) | (((size >> 2) & 0x1fff) << 12) | 0xE34;
> +
> + err = tegra_vde_push_to_bsev_icmdqueue(vde, value, false);
> + if (err)
> + return err;

#endif

Is it supposed to do the same as "clear H256RefPicList -> memset(vde->iram, 0, 
size)" above?

> +
> + /* upload H264RefPicList */
>   value = (0x20 << 26) | (0x0 << 25) | ((size >> 2) & 0x1fff);
>   err = tegra_vde_push_to_bsev_icmdqueue(vde, value, false);
>   if (err)
> @@ -584,7 +599,11 @@ static int tegra_vde_setup_hw_context(struct tegra_vde
> *vde,
> 
>   tegra_vde_mbe_set_0xa_reg(vde, 0, 0x09FC);
>   tegra_vde_mbe_set_0xa_reg(vde, 2, 0x61DEAD00);
> +#if 0
> + tegra_vde_mbe_set_0xa_reg(vde, 4, dpb_frames[0].aux_addr); /* 0x62DEAD00
> */ +#else
>   tegra_vde_mbe_set_0xa_reg(vde, 4, 0x62DEAD00);
> +#endif

This doesn't really clarify much, let's drop this chunk for now.

>   tegra_vde_mbe_set_0xa_reg(vde, 6, 0x63DEAD00);
>   tegra_vde_mbe_set_0xa_reg(vde, 8, dpb_frames[0].aux_addr);






Re: [PATCH 09/14] staging: media: tegra-vde: Add IOMMU support

2018-08-18 Thread Dmitry Osipenko
On Monday, 13 August 2018 17:50:22 MSK Thierry Reding wrote:
> From: Thierry Reding 
> 
> Implement support for using an IOMMU to map physically discontiguous
> buffers into contiguous I/O virtual mappings that the VDE can use. This
> allows importing arbitrary DMA-BUFs for use by the VDE.
> 
> While at it, make sure that the device is detached from any DMA/IOMMU
> mapping that it might have automatically been attached to at boot. If
> using the IOMMU API explicitly, detaching from any existing mapping is
> required to avoid double mapping of buffers.
> 
> Signed-off-by: Thierry Reding 
> ---
>  drivers/staging/media/tegra-vde/tegra-vde.c | 171 +---
>  1 file changed, 153 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/staging/media/tegra-vde/tegra-vde.c
> b/drivers/staging/media/tegra-vde/tegra-vde.c index
> 2496a03fd158..3bc0bfcfe34e 100644
> --- a/drivers/staging/media/tegra-vde/tegra-vde.c
> +++ b/drivers/staging/media/tegra-vde/tegra-vde.c
> @@ -13,7 +13,9 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -22,6 +24,10 @@
>  #include 
>  #include 
> 
> +#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)
> +#include 
> +#endif
> +
>  #include 
> 
>  #include 
> @@ -61,6 +67,11 @@ struct video_frame {
>   u32 frame_num;
>   u32 flags;
>   u64 modifier;
> +
> + struct iova *y_iova;
> + struct iova *cb_iova;
> + struct iova *cr_iova;
> + struct iova *aux_iova;
>  };
> 
>  struct tegra_vde_soc {
> @@ -93,6 +104,12 @@ struct tegra_vde {
>   struct clk *clk_bsev;
>   dma_addr_t iram_lists_addr;
>   u32 *iram;
> +
> + struct iommu_domain *domain;
> + struct iommu_group *group;
> + struct iova_domain iova;
> + unsigned long limit;
> + unsigned int shift;
>  };
> 
>  static void tegra_vde_set_bits(struct tegra_vde *vde,
> @@ -634,12 +651,22 @@ static void tegra_vde_decode_frame(struct tegra_vde
> *vde, VDE_WR(0x2000 | (macroblocks_nb - 1), vde->sxe + 0x00);
>  }
> 
> -static void tegra_vde_detach_and_put_dmabuf(struct dma_buf_attachment *a,
> +static void tegra_vde_detach_and_put_dmabuf(struct tegra_vde *vde,
> + struct dma_buf_attachment *a,
>   struct sg_table *sgt,
> + struct iova *iova,
>   enum dma_data_direction dma_dir)
>  {
>   struct dma_buf *dmabuf = a->dmabuf;
> 
> + if (vde->domain) {
> + unsigned long size = iova_size(iova) << vde->shift;

Let's make it "size = iova_align(>iova, dmabuf->size)" for better 
readability.

> + dma_addr_t addr = iova_dma_addr(>iova, iova);
> +
> + iommu_unmap(vde->domain, addr, size);
> + __free_iova(>iova, iova);
> + }
> +
>   dma_buf_unmap_attachment(a, sgt, dma_dir);
>   dma_buf_detach(dmabuf, a);
>   dma_buf_put(dmabuf);
> @@ -651,14 +678,16 @@ static int tegra_vde_attach_dmabuf(struct tegra_vde
> *vde, size_t min_size,
>  size_t align_size,
>  struct dma_buf_attachment **a,
> -dma_addr_t *addr,
> +dma_addr_t *addrp,
>  struct sg_table **s,
> -size_t *size,
> +struct iova **iovap,
> +size_t *sizep,
>  enum dma_data_direction dma_dir)
>  {
>   struct dma_buf_attachment *attachment;
>   struct dma_buf *dmabuf;
>   struct sg_table *sgt;
> + size_t size;
>   int err;
> 
>   dmabuf = dma_buf_get(fd);
> @@ -695,18 +724,47 @@ static int tegra_vde_attach_dmabuf(struct tegra_vde
> *vde, goto err_detach;
>   }
> 
> - if (sgt->nents != 1) {
> + if (sgt->nents > 1 && !vde->domain) {
>   dev_err(vde->dev, "Sparse DMA region is unsupported\n");
>   err = -EINVAL;
>   goto err_unmap;
>   }
> 
> - *addr = sg_dma_address(sgt->sgl) + offset;
> + if (vde->domain) {
> + int prot = IOMMU_READ | IOMMU_WRITE;
> + struct iova *iova;
> + dma_addr_t addr;
> +
> + size = (dmabuf->size - offset) >> vde->shift;

Offset shall not be subtracted and dmabuf size shall be rounded to IOVA 
granule. Also, let's not carry shift within the vde structure as it doesn't 
really worth it.


shift = iova_shift(>iova);
size = iova_align(>iova, dmabuf->size) >> shift;

> +
> + iova = alloc_iova(>iova, size, vde->limit - 1, true);
> + if (!iova) {
> + err = -ENOMEM;
> + goto err_unmap;
> + }
> +
> + addr = iova_dma_addr(>iova, iova);
> +
> + size = iommu_map_sg(vde->domain, addr, sgt->sgl, sgt->nents,
> +   

Re: [PATCH 08/14] staging: media: tegra-vde: Track struct device *

2018-08-18 Thread Dmitry Osipenko
l_decode_h264(struct
> tegra_vde *vde, while (i--) {
>   dma_dir = (i == 0) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
> 
> - tegra_vde_release_frame_dmabufs(_frames[i], dma_dir,
> + tegra_vde_release_frame_dmabufs(vde, _frames[i], dma_dir,
>   ctx.baseline_profile);
>   }
> 
> @@ -1089,10 +1094,12 @@ static int tegra_vde_ioctl_decode_h264(struct
> tegra_vde *vde,
> 
>  release_bitstream_dmabuf:
>   if (secure_attachment)
> - tegra_vde_detach_and_put_dmabuf(secure_attachment, secure_sgt,
> + tegra_vde_detach_and_put_dmabuf(vde, secure_attachment,
> + secure_sgt,
>   DMA_TO_DEVICE);
> 
> - tegra_vde_detach_and_put_dmabuf(bitstream_data_dmabuf_attachment,
> + tegra_vde_detach_and_put_dmabuf(vde,
> + bitstream_data_dmabuf_attachment,
>   bitstream_sgt, DMA_TO_DEVICE);
> 
>   return ret;
> @@ -1190,6 +1197,8 @@ static int tegra_vde_probe(struct platform_device
> *pdev) if (!vde)
>   return -ENOMEM;
> 
> + vde->dev = >dev;
> +
>   platform_set_drvdata(pdev, vde);
> 
>   vde->soc = of_device_get_match_data(>dev);

Reviewed-by: Dmitry Osipenko 




Re: [PATCH 02/14] staging: media: tegra-vde: Support reference picture marking

2018-08-18 Thread Dmitry Osipenko
On Monday, 13 August 2018 17:50:15 MSK Thierry Reding wrote:
> From: Thierry Reding 
> 
> Tegra114 and Tegra124 support reference picture marking, which will
> cause BSEV to write picture marking data to SDRAM. Make sure there is
> a valid destination address for that data to avoid error messages from
> the memory controller.
> 
> Signed-off-by: Thierry Reding 
> ---
>  drivers/staging/media/tegra-vde/tegra-vde.c | 54 -
>  drivers/staging/media/tegra-vde/uapi.h  |  3 ++
>  2 files changed, 55 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/staging/media/tegra-vde/tegra-vde.c
> b/drivers/staging/media/tegra-vde/tegra-vde.c index
> 9d8f833744db..3027b11b11ae 100644
> --- a/drivers/staging/media/tegra-vde/tegra-vde.c
> +++ b/drivers/staging/media/tegra-vde/tegra-vde.c
> @@ -60,7 +60,12 @@ struct video_frame {
>   u32 flags;
>  };
> 
> +struct tegra_vde_soc {
> + bool supports_ref_pic_marking;
> +};
> +
>  struct tegra_vde {
> + const struct tegra_vde_soc *soc;
>   void __iomem *sxe;
>   void __iomem *bsev;
>   void __iomem *mbe;
> @@ -330,6 +335,7 @@ static int tegra_vde_setup_hw_context(struct tegra_vde
> *vde, struct video_frame *dpb_frames,
> dma_addr_t bitstream_data_addr,
> size_t bitstream_data_size,
> +   dma_addr_t secure_addr,
> unsigned int macroblocks_nb)
>  {
>   struct device *dev = vde->miscdev.parent;
> @@ -454,6 +460,9 @@ static int tegra_vde_setup_hw_context(struct tegra_vde
> *vde,
> 
>   VDE_WR(bitstream_data_addr, vde->sxe + 0x6C);
> 
> + if (vde->soc->supports_ref_pic_marking)
> + VDE_WR(secure_addr, vde->sxe + 0x7c);
> +
>   value = 0x1005;
>   value |= ctx->pic_width_in_mbs << 11;
>   value |= ctx->pic_height_in_mbs << 3;
> @@ -772,12 +781,15 @@ static int tegra_vde_ioctl_decode_h264(struct
> tegra_vde *vde, struct tegra_vde_h264_frame __user *frames_user;
>   struct video_frame *dpb_frames;
>   struct dma_buf_attachment *bitstream_data_dmabuf_attachment;
> - struct sg_table *bitstream_sgt;
> + struct dma_buf_attachment *secure_attachment = NULL;
> + struct sg_table *bitstream_sgt, *secure_sgt;
>   enum dma_data_direction dma_dir;
>   dma_addr_t bitstream_data_addr;
> + dma_addr_t secure_addr;
>   dma_addr_t bsev_ptr;
>   size_t lsize, csize;
>   size_t bitstream_data_size;
> + size_t secure_size;

secure_size is unused, you could omit it and replace with NULL below.

>   unsigned int macroblocks_nb;
>   unsigned int read_bytes;
>   unsigned int cstride;
> @@ -803,6 +815,18 @@ static int tegra_vde_ioctl_decode_h264(struct tegra_vde
> *vde, if (ret)
>   return ret;
> 
> + if (vde->soc->supports_ref_pic_marking) {
> + ret = tegra_vde_attach_dmabuf(dev, ctx.secure_fd,
> +   ctx.secure_offset, 0, SZ_256,

Minimum buffer size? Since it's coming from userspace, you must specify it to 
validate buffers size correctly.

> +   _attachment,
> +   _addr,
> +   _sgt,
> +   _size,
> +   DMA_TO_DEVICE);
> + if (ret)
> + goto release_bitstream_dmabuf;
> + }
> +
>   dpb_frames = kcalloc(ctx.dpb_frames_nb, sizeof(*dpb_frames),
>GFP_KERNEL);
>   if (!dpb_frames) {
> @@ -876,6 +900,7 @@ static int tegra_vde_ioctl_decode_h264(struct tegra_vde
> *vde, ret = tegra_vde_setup_hw_context(vde, , dpb_frames,
>bitstream_data_addr,
>bitstream_data_size,
> +  secure_addr,
>macroblocks_nb);
>   if (ret)
>   goto put_runtime_pm;
> @@ -929,6 +954,10 @@ static int tegra_vde_ioctl_decode_h264(struct tegra_vde
> *vde, kfree(dpb_frames);
> 
>  release_bitstream_dmabuf:

release_secure_dmabuf:

> + if (secure_attachment)
> + tegra_vde_detach_and_put_dmabuf(secure_attachment, secure_sgt,
> + DMA_TO_DEVICE);
> +
>   tegra_vde_detach_and_put_dmabuf(bitstream_data_dmabuf_attachment,
>   bitstream_sgt, DMA_TO_DEVICE);
> 
> @@ -1029,6 +1058,8 @@ static int tegra_vde_probe(struct platform_device
> *pdev)
> 
>   platform_set_drvdata(pdev, vde);
> 
> + vde->soc = of_device_get_match_data(>dev);
> +
>   regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "sxe");
>   if (!regs)
>   return -ENODEV;
> @@ -1258,8 +1289,27 @@ static const struct dev_pm_ops tegra_vde_pm_ops = {
>   

Re: [PATCH 03/14] staging: media: tegra-vde: Prepare for interlacing support

2018-08-18 Thread Dmitry Osipenko
On Monday, 13 August 2018 17:50:16 MSK Thierry Reding wrote:
> From: Thierry Reding 
> 
> The number of frames doubles when decoding interlaced content and the
> structures describing the frames double in size. Take that into account
> to prepare for interlacing support.
> 
> Signed-off-by: Thierry Reding 
> ---
>  drivers/staging/media/tegra-vde/tegra-vde.c | 73 -
>  1 file changed, 58 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/staging/media/tegra-vde/tegra-vde.c
> b/drivers/staging/media/tegra-vde/tegra-vde.c index
> 3027b11b11ae..1a40f6dff7c8 100644
> --- a/drivers/staging/media/tegra-vde/tegra-vde.c
> +++ b/drivers/staging/media/tegra-vde/tegra-vde.c
> @@ -61,7 +61,9 @@ struct video_frame {
>  };
> 
>  struct tegra_vde_soc {
> + unsigned int num_ref_pics;
>   bool supports_ref_pic_marking;
> + bool supports_interlacing;
>  };
> 
>  struct tegra_vde {
> @@ -205,8 +207,12 @@ static void tegra_vde_setup_frameid(struct tegra_vde
> *vde, u32 cr_addr = frame ? frame->cr_addr : 0x6CDEAD00;
>   u32 value1 = frame ? ((mbs_width << 16) | mbs_height) : 0;
>   u32 value2 = frame ? mbs_width + 1) >> 1) << 6) | 1) : 0;
> + u32 value = y_addr >> 8;

Let's name it value0 for consistency.

> 
> - VDE_WR(y_addr  >> 8, vde->frameid + 0x000 + frameid * 4);
> + if (vde->soc->supports_interlacing)
> + value |= BIT(31);
> +
> + VDE_WR(value,vde->frameid + 0x000 + frameid * 4);
>   VDE_WR(cb_addr >> 8, vde->frameid + 0x100 + frameid * 4);
>   VDE_WR(cr_addr >> 8, vde->frameid + 0x180 + frameid * 4);
>   VDE_WR(value1,   vde->frameid + 0x080 + frameid * 4);
> @@ -229,20 +235,23 @@ static void tegra_setup_frameidx(struct tegra_vde
> *vde, }
> 
>  static void tegra_vde_setup_iram_entry(struct tegra_vde *vde,
> +unsigned int num_ref_pics,
>  unsigned int table,
>  unsigned int row,
>  u32 value1, u32 value2)
>  {
> + unsigned int entries = num_ref_pics * 2;
>   u32 *iram_tables = vde->iram;
> 
>   dev_dbg(vde->miscdev.parent, "IRAM table %u: row %u: 0x%08X 0x%08X\n",
>   table, row, value1, value2);
> 
> - iram_tables[0x20 * table + row * 2] = value1;
> - iram_tables[0x20 * table + row * 2 + 1] = value2;
> + iram_tables[entries * table + row * 2] = value1;
> + iram_tables[entries * table + row * 2 + 1] = value2;
>  }
> 
>  static void tegra_vde_setup_iram_tables(struct tegra_vde *vde,
> + unsigned int num_ref_pics,
>   struct video_frame *dpb_frames,
>   unsigned int ref_frames_nb,
>   unsigned int with_earlier_poc_nb)
> @@ -251,13 +260,17 @@ static void tegra_vde_setup_iram_tables(struct
> tegra_vde *vde, u32 value, aux_addr;
>   int with_later_poc_nb;
>   unsigned int i, k;
> + size_t size;
> +
> + size = num_ref_pics * 4 * 8;
> + memset(vde->iram, 0, size);

Is this memset() really needed or it is just because you're feeling 
uncomfortable that something is kept uninitialized?

> 
>   dev_dbg(vde->miscdev.parent, "DPB: Frame 0: frame_num = %d\n",
>   dpb_frames[0].frame_num);
> 
>   dev_dbg(vde->miscdev.parent, "REF L0:\n");
> 
> - for (i = 0; i < 16; i++) {
> + for (i = 0; i < num_ref_pics; i++) {
>   if (i < ref_frames_nb) {
>   frame = _frames[i + 1];
> 
> @@ -277,10 +290,14 @@ static void tegra_vde_setup_iram_tables(struct
> tegra_vde *vde, value = 0;
>   }
> 
> - tegra_vde_setup_iram_entry(vde, 0, i, value, aux_addr);
> - tegra_vde_setup_iram_entry(vde, 1, i, value, aux_addr);
> - tegra_vde_setup_iram_entry(vde, 2, i, value, aux_addr);
> - tegra_vde_setup_iram_entry(vde, 3, i, value, aux_addr);
> + tegra_vde_setup_iram_entry(vde, num_ref_pics, 0, i, value,
> +aux_addr);
> + tegra_vde_setup_iram_entry(vde, num_ref_pics, 1, i, value,
> +aux_addr);
> + tegra_vde_setup_iram_entry(vde, num_ref_pics, 2, i, value,
> +aux_addr);
> + tegra_vde_setup_iram_entry(vde, num_ref_pics, 3, i, value,
> +aux_addr);
>   }
> 
>   if (!(dpb_frames[0].flags & FLAG_B_FRAME))
> @@ -309,7 +326,8 @@ static void tegra_vde_setup_iram_tables(struct tegra_vde
> *vde, "\tFrame %d: frame_num = %d\n",
>   k + 1, frame->frame_num);
> 
> - tegra_vde_setup_iram_entry(vde, 2, i, value, aux_addr);
> + tegra_vde_setup_iram_entry(vde, num_ref_pics, 2, i, value,
> +aux_addr);
>   }
> 
>   for 

Re: [PATCH 06/14] staging: media: tegra-vde: Print out invalid FD

2018-08-18 Thread Dmitry Osipenko
On Monday, 13 August 2018 17:50:19 MSK Thierry Reding wrote:
> From: Thierry Reding 
> 
> Include the invalid file descriptor when reporting an error message to
> help diagnosing why importing the buffer failed.
> 
> Signed-off-by: Thierry Reding 
> ---
>  drivers/staging/media/tegra-vde/tegra-vde.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/staging/media/tegra-vde/tegra-vde.c
> b/drivers/staging/media/tegra-vde/tegra-vde.c index
> 0ce30c7ccb75..0adc603fa437 100644
> --- a/drivers/staging/media/tegra-vde/tegra-vde.c
> +++ b/drivers/staging/media/tegra-vde/tegra-vde.c
> @@ -643,7 +643,7 @@ static int tegra_vde_attach_dmabuf(struct device *dev,
> 
>   dmabuf = dma_buf_get(fd);
>   if (IS_ERR(dmabuf)) {
> - dev_err(dev, "Invalid dmabuf FD\n");
> + dev_err(dev, "Invalid dmabuf FD: %d\n", fd);
>   return PTR_ERR(dmabuf);
>   }

Reviewed-by: Dmitry Osipenko 




Re: [PATCH 14/14] ARM: tegra: Enable SMMU for VDE on Tegra124

2018-08-18 Thread Dmitry Osipenko
On Monday, 13 August 2018 17:50:27 MSK Thierry Reding wrote:
> From: Thierry Reding 
> 
> The video decode engine can use the SMMU to use buffers that are not
> physically contiguous in memory. This allows better memory usage for
> video decoding, since fragmentation may cause contiguous allocations
> to fail.
> 
> Signed-off-by: Thierry Reding 
> ---
>  arch/arm/boot/dts/tegra124.dtsi | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/arch/arm/boot/dts/tegra124.dtsi
> b/arch/arm/boot/dts/tegra124.dtsi index 8fdca4723205..0713e0ed5fef 100644
> --- a/arch/arm/boot/dts/tegra124.dtsi
> +++ b/arch/arm/boot/dts/tegra124.dtsi
> @@ -321,6 +321,8 @@
>   resets = <_car 61>,
><_car 63>;
>   reset-names = "vde", "bsev";
> +
> +     iommus = < TEGRA_SWGROUP_VDE>;
>   };
> 
>   apbdma: dma@6002 {

Reviewed-by: Dmitry Osipenko 

The same should be applied to Tegra30.




Re: [PATCH 05/14] staging: media: tegra-vde: Properly mark invalid entries

2018-08-18 Thread Dmitry Osipenko
On Monday, 13 August 2018 17:50:18 MSK Thierry Reding wrote:
> From: Thierry Reding 
> 
> Entries in the reference picture list are marked as invalid by setting
> the frame ID to 0x3f.
> 
> Signed-off-by: Thierry Reding 
> ---
>  drivers/staging/media/tegra-vde/tegra-vde.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/staging/media/tegra-vde/tegra-vde.c
> b/drivers/staging/media/tegra-vde/tegra-vde.c index
> 275884e745df..0ce30c7ccb75 100644
> --- a/drivers/staging/media/tegra-vde/tegra-vde.c
> +++ b/drivers/staging/media/tegra-vde/tegra-vde.c
> @@ -296,7 +296,7 @@ static void tegra_vde_setup_iram_tables(struct tegra_vde
> *vde, (frame->flags & FLAG_B_FRAME));
>   } else {
>   aux_addr = 0x6ADEAD00;
> - value = 0;
> + value = 0x3f;
>   }
> 
>   tegra_vde_setup_iram_entry(vde, num_ref_pics, 0, i, value,

Reviewed-by: Dmitry Osipenko 
Tested-by: Dmitry Osipenko 




Re: [PATCH 11/14] ARM: tegra: Enable VDE on Tegra124

2018-08-18 Thread Dmitry Osipenko
On Monday, 13 August 2018 17:50:24 MSK Thierry Reding wrote:
> From: Thierry Reding 
> 
> Signed-off-by: Thierry Reding 
> ---
>  arch/arm/boot/dts/tegra124.dtsi | 40 +
>  1 file changed, 40 insertions(+)
> 
> diff --git a/arch/arm/boot/dts/tegra124.dtsi
> b/arch/arm/boot/dts/tegra124.dtsi index b113e47b2b2a..8fdca4723205 100644
> --- a/arch/arm/boot/dts/tegra124.dtsi
> +++ b/arch/arm/boot/dts/tegra124.dtsi
> @@ -83,6 +83,19 @@
>   };
>   };
> 
> + iram@4000 {
> + compatible = "mmio-sram";
> + reg = <0x0 0x4000 0x0 0x4>;
> + #address-cells = <1>;
> + #size-cells = <1>;
> + ranges = <0 0x0 0x4000 0x4>;
> +
> + vde_pool: pool@400 {
> + reg = <0x400 0x3fc00>;
> + pool;
> + };
> + };
> +
>   host1x@5000 {
>   compatible = "nvidia,tegra124-host1x", "simple-bus";
>   reg = <0x0 0x5000 0x0 0x00034000>;
> @@ -283,6 +296,33 @@
>   */
>   };
> 
> + vde@6003 {
> + compatible = "nvidia,tegra124-vde", "nvidia,tegra30-vde",
> +  "nvidia,tegra20-vde";
> + reg = <0x0 0x6003 0x0 0x1000   /* Syntax Engine */
> +0x0 0x60031000 0x0 0x1000   /* Video Bitstream Engine */
> +0x0 0x60032000 0x0 0x0100   /* Macroblock Engine */
> +0x0 0x60032200 0x0 0x0100   /* Post-processing Engine */
> +0x0 0x60032400 0x0 0x0100   /* Motion Compensation 
> Engine */
> +0x0 0x60032600 0x0 0x0100   /* Transform Engine */
> +0x0 0x60032800 0x0 0x0100   /* Pixel prediction block */
> +0x0 0x60032a00 0x0 0x0100   /* Video DMA */
> +0x0 0x60033800 0x0 0x0400>; /* Video frame controls */
> + reg-names = "sxe", "bsev", "mbe", "ppe", "mce",
> + "tfe", "ppb", "vdma", "frameid";
> + iram = <_pool>; /* IRAM region */
> + interrupts = , /* Sync token 
> +  , /* BSE-V 
> interrupt */
> +  ; /* SXE interrupt 
> */
> + interrupt-names = "sync-token", "bsev", "sxe";
> + clocks = <_car TEGRA124_CLK_VDE>,
> +  <_car TEGRA124_CLK_BSEV>;
> + clock-names = "vde", "bsev";
> + resets = <_car 61>,
> +  <_car 63>;
> + reset-names = "vde", "bsev";

Memory client reset missed?

> + };
> +
>   apbdma: dma@6002 {
>   compatible = "nvidia,tegra124-apbdma", "nvidia,tegra148-apbdma";
>   reg = <0x0 0x6002 0x0 0x1400>;






Re: [PATCH 01/14] staging: media: tegra-vde: Support BSEV clock and reset

2018-08-14 Thread Dmitry Osipenko
On Tuesday, 14 August 2018 18:05:51 MSK Dmitry Osipenko wrote:
> On Tuesday, 14 August 2018 17:21:24 MSK Thierry Reding wrote:
> > On Mon, Aug 13, 2018 at 06:09:46PM +0300, Dmitry Osipenko wrote:
> > > On Monday, 13 August 2018 17:50:14 MSK Thierry Reding wrote:
> > > > From: Thierry Reding 
> > > > 
> > > > The BSEV clock has a separate gate bit and can not be assumed to be
> > > > always enabled. Add explicit handling for the BSEV clock and reset.
> > > > 
> > > > This fixes an issue on Tegra124 where the BSEV clock is not enabled
> > > > by default and therefore accessing the BSEV registers will hang the
> > > > CPU if the BSEV clock is not enabled and the reset not deasserted.
> > > > 
> > > > Signed-off-by: Thierry Reding 
> > > > ---
> > > 
> > > Are you sure that BSEV clock is really needed for T20/30? I've tried
> > > already to disable the clock explicitly and everything kept working,
> > > though I'll try again.
> > 
> > I think you're right that these aren't strictly required for VDE to work
> > on Tegra20 and Tegra30. However, the BSEV clock and reset do exist on
> > those platforms, so I didn't see a reason why they shouldn't be handled
> > uniformly across all generations.
> 
> It's a bit messy to have unsed clock being enabled.
> 
> I guess BSEV clock on T20/30 only enables the AES engine. If the decryption
> engine is integrated with the video decoder, then the clock and reset should
> be requested by the driver, but BSEV should be kept disabled if it's not
> used.

Though even if encryption is not directly integrated with the video decoding, 
then it still makes sense to define the clock and reset in DT without using 
them by the VDE driver since the HW registers space is shared. If somebody 
would like to implement the AES driver, it could be made as a sub-device of 
VDE.




Re: [PATCH 01/14] staging: media: tegra-vde: Support BSEV clock and reset

2018-08-14 Thread Dmitry Osipenko
On Tuesday, 14 August 2018 17:21:24 MSK Thierry Reding wrote:
> On Mon, Aug 13, 2018 at 06:09:46PM +0300, Dmitry Osipenko wrote:
> > On Monday, 13 August 2018 17:50:14 MSK Thierry Reding wrote:
> > > From: Thierry Reding 
> > > 
> > > The BSEV clock has a separate gate bit and can not be assumed to be
> > > always enabled. Add explicit handling for the BSEV clock and reset.
> > > 
> > > This fixes an issue on Tegra124 where the BSEV clock is not enabled
> > > by default and therefore accessing the BSEV registers will hang the
> > > CPU if the BSEV clock is not enabled and the reset not deasserted.
> > > 
> > > Signed-off-by: Thierry Reding 
> > > ---
> > 
> > Are you sure that BSEV clock is really needed for T20/30? I've tried
> > already to disable the clock explicitly and everything kept working,
> > though I'll try again.
> 
> I think you're right that these aren't strictly required for VDE to work
> on Tegra20 and Tegra30. However, the BSEV clock and reset do exist on
> those platforms, so I didn't see a reason why they shouldn't be handled
> uniformly across all generations.

It's a bit messy to have unsed clock being enabled.

I guess BSEV clock on T20/30 only enables the AES engine. If the decryption 
engine is integrated with the video decoder, then the clock and reset should 
be requested by the driver, but BSEV should be kept disabled if it's not used.

If BSEV clock isn't powering anything related to VDE on T20/30, then let's 
make BSEV clock and reset control optional. For the clock we could check 
whether err = -ENOENT and continue, later we may switch to 
devm_clk_get_optional() of the upcoming [0]. For the reset there is 
devm_reset_control_get_optional(). 

Please try to verify by all means that we can omit BSEV on T20/30. If you are 
not sure, then let's make them optional as we can always make them required 
later.

P.S. I'll test and review all the patches during the next days. 

[0] https://lkml.org/lkml/2018/7/18/460




Re: [PATCH 01/14] staging: media: tegra-vde: Support BSEV clock and reset

2018-08-13 Thread Dmitry Osipenko
On Monday, 13 August 2018 17:50:14 MSK Thierry Reding wrote:
> From: Thierry Reding 
> 
> The BSEV clock has a separate gate bit and can not be assumed to be
> always enabled. Add explicit handling for the BSEV clock and reset.
> 
> This fixes an issue on Tegra124 where the BSEV clock is not enabled
> by default and therefore accessing the BSEV registers will hang the
> CPU if the BSEV clock is not enabled and the reset not deasserted.
> 
> Signed-off-by: Thierry Reding 
> ---

Are you sure that BSEV clock is really needed for T20/30? I've tried already 
to disable the clock explicitly and everything kept working, though I'll try 
again.

The device-tree changes should be reflected in the binding documentation.





Re: [RFC PATCH v2 1/2] drm: Add generic colorkey properties

2018-05-28 Thread Dmitry Osipenko
On 29.05.2018 02:48, Dmitry Osipenko wrote:
> inversion=true" if mask has form of 0x11000111, though this could be not

For clarity: I meant s/0x11000111/0xFF000FFF/.


Re: [RFC PATCH v2 1/2] drm: Add generic colorkey properties

2018-05-28 Thread Dmitry Osipenko
On 28.05.2018 16:15, Ville Syrjälä wrote:
> On Sat, May 26, 2018 at 06:56:22PM +0300, Dmitry Osipenko wrote:
>> Color keying is the action of replacing pixels matching a given color
>> (or range of colors) with transparent pixels in an overlay when
>> performing blitting. Depending on the hardware capabilities, the
>> matching pixel can either become fully transparent or gain adjustment
>> of the pixels component values.
>>
>> Color keying is found in a large number of devices whose capabilities
>> often differ, but they still have enough common features in range to
>> standardize color key properties. This commit adds nine generic DRM plane
>> properties related to the color keying to cover various HW capabilities.
>>
>> This patch is based on the initial work done by Laurent Pinchart, most of
>> credits for this patch goes to him.
>>
>> Signed-off-by: Dmitry Osipenko 
>> ---
>>  drivers/gpu/drm/drm_atomic.c |  36 ++
>>  drivers/gpu/drm/drm_blend.c  | 229 +++
>>  include/drm/drm_blend.h  |   3 +
>>  include/drm/drm_plane.h  |  77 
>>  4 files changed, 345 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
>> index 895741e9cd7d..5b808cb68654 100644
>> --- a/drivers/gpu/drm/drm_atomic.c
>> +++ b/drivers/gpu/drm/drm_atomic.c
>> @@ -799,6 +799,24 @@ static int drm_atomic_plane_set_property(struct 
>> drm_plane *plane,
>>  state->rotation = val;
>>  } else if (property == plane->zpos_property) {
>>  state->zpos = val;
>> +} else if (property == plane->colorkey.mode_property) {
>> +state->colorkey.mode = val;
>> +} else if (property == plane->colorkey.min_property) {
>> +state->colorkey.min = val;
>> +} else if (property == plane->colorkey.max_property) {
>> +state->colorkey.max = val;
>> +} else if (property == plane->colorkey.format_property) {
>> +state->colorkey.format = val;
>> +} else if (property == plane->colorkey.mask_property) {
>> +state->colorkey.mask = val;
>> +} else if (property == plane->colorkey.inverted_match_property) {
>> +state->colorkey.inverted_match = val;
>> +} else if (property == plane->colorkey.replacement_mask_property) {
>> +state->colorkey.replacement_mask = val;
>> +} else if (property == plane->colorkey.replacement_value_property) {
>> +state->colorkey.replacement_value = val;
>> +} else if (property == plane->colorkey.replacement_format_property) {
>> +state->colorkey.replacement_format = val;
>>  } else if (property == plane->color_encoding_property) {
>>  state->color_encoding = val;
>>  } else if (property == plane->color_range_property) {
>> @@ -864,6 +882,24 @@ drm_atomic_plane_get_property(struct drm_plane *plane,
>>  *val = state->rotation;
>>  } else if (property == plane->zpos_property) {
>>  *val = state->zpos;
>> +} else if (property == plane->colorkey.mode_property) {
>> +*val = state->colorkey.mode;
>> +} else if (property == plane->colorkey.min_property) {
>> +*val = state->colorkey.min;
>> +} else if (property == plane->colorkey.max_property) {
>> +*val = state->colorkey.max;
>> +} else if (property == plane->colorkey.format_property) {
>> +*val = state->colorkey.format;
>> +} else if (property == plane->colorkey.mask_property) {
>> +*val = state->colorkey.mask;
>> +} else if (property == plane->colorkey.inverted_match_property) {
>> +*val = state->colorkey.inverted_match;
>> +} else if (property == plane->colorkey.replacement_mask_property) {
>> +*val = state->colorkey.replacement_mask;
>> +} else if (property == plane->colorkey.replacement_value_property) {
>> +*val = state->colorkey.replacement_value;
>> +} else if (property == plane->colorkey.replacement_format_property) {
>> +*val = state->colorkey.replacement_format;
>>  } else if (property == plane->color_encoding_property) {
>>  *val = state->color_encoding;
>>  } else if (property == plane->color_range_property) {
>> diff --git a/drivers/gpu/drm/drm_blend.c b/drivers/gpu/drm/drm_blend.c
>> index a16a74d7e15e..05e

Re: [RFC PATCH v2 1/2] drm: Add generic colorkey properties

2018-05-26 Thread Dmitry Osipenko
On 26.05.2018 19:18, Laurent Pinchart wrote:
> On Saturday, 26 May 2018 19:16:54 EEST Laurent Pinchart wrote:
>> Hi Dimitri,
> 
> And sorry for the spelling mistake :-/

That's also a kinda correct spelling. No worries ;)


Re: [RFC PATCH v2 1/2] drm: Add generic colorkey properties

2018-05-26 Thread Dmitry Osipenko
On 26.05.2018 19:16, Laurent Pinchart wrote:
> Hi Dimitri,
> 
> Thank you for the patch.
> 
> I'll review this in details, but as this patch is based on the "[PATCH/RFC 
> 1/4] drm: Add colorkey properties" patch I've submitted, please retain the 
> authorship, both in the Signed-off-by line, and in the patch author in git.
Okay. /I think/ I've seen requests to do the other way around for the picked up
and re-worked patches, though I don't mind at all to keep your authorship. I'll
change the authorship in the next iteration. Waiting for you review comments,
thanks.


[RFC PATCH v2 1/2] drm: Add generic colorkey properties

2018-05-26 Thread Dmitry Osipenko
Color keying is the action of replacing pixels matching a given color
(or range of colors) with transparent pixels in an overlay when
performing blitting. Depending on the hardware capabilities, the
matching pixel can either become fully transparent or gain adjustment
of the pixels component values.

Color keying is found in a large number of devices whose capabilities
often differ, but they still have enough common features in range to
standardize color key properties. This commit adds nine generic DRM plane
properties related to the color keying to cover various HW capabilities.

This patch is based on the initial work done by Laurent Pinchart, most of
credits for this patch goes to him.

Signed-off-by: Dmitry Osipenko <dig...@gmail.com>
---
 drivers/gpu/drm/drm_atomic.c |  36 ++
 drivers/gpu/drm/drm_blend.c  | 229 +++
 include/drm/drm_blend.h  |   3 +
 include/drm/drm_plane.h  |  77 
 4 files changed, 345 insertions(+)

diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index 895741e9cd7d..5b808cb68654 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -799,6 +799,24 @@ static int drm_atomic_plane_set_property(struct drm_plane 
*plane,
state->rotation = val;
} else if (property == plane->zpos_property) {
state->zpos = val;
+   } else if (property == plane->colorkey.mode_property) {
+   state->colorkey.mode = val;
+   } else if (property == plane->colorkey.min_property) {
+   state->colorkey.min = val;
+   } else if (property == plane->colorkey.max_property) {
+   state->colorkey.max = val;
+   } else if (property == plane->colorkey.format_property) {
+   state->colorkey.format = val;
+   } else if (property == plane->colorkey.mask_property) {
+   state->colorkey.mask = val;
+   } else if (property == plane->colorkey.inverted_match_property) {
+   state->colorkey.inverted_match = val;
+   } else if (property == plane->colorkey.replacement_mask_property) {
+   state->colorkey.replacement_mask = val;
+   } else if (property == plane->colorkey.replacement_value_property) {
+   state->colorkey.replacement_value = val;
+   } else if (property == plane->colorkey.replacement_format_property) {
+   state->colorkey.replacement_format = val;
} else if (property == plane->color_encoding_property) {
state->color_encoding = val;
} else if (property == plane->color_range_property) {
@@ -864,6 +882,24 @@ drm_atomic_plane_get_property(struct drm_plane *plane,
*val = state->rotation;
} else if (property == plane->zpos_property) {
*val = state->zpos;
+   } else if (property == plane->colorkey.mode_property) {
+   *val = state->colorkey.mode;
+   } else if (property == plane->colorkey.min_property) {
+   *val = state->colorkey.min;
+   } else if (property == plane->colorkey.max_property) {
+   *val = state->colorkey.max;
+   } else if (property == plane->colorkey.format_property) {
+   *val = state->colorkey.format;
+   } else if (property == plane->colorkey.mask_property) {
+   *val = state->colorkey.mask;
+   } else if (property == plane->colorkey.inverted_match_property) {
+   *val = state->colorkey.inverted_match;
+   } else if (property == plane->colorkey.replacement_mask_property) {
+   *val = state->colorkey.replacement_mask;
+   } else if (property == plane->colorkey.replacement_value_property) {
+   *val = state->colorkey.replacement_value;
+   } else if (property == plane->colorkey.replacement_format_property) {
+   *val = state->colorkey.replacement_format;
} else if (property == plane->color_encoding_property) {
*val = state->color_encoding;
} else if (property == plane->color_range_property) {
diff --git a/drivers/gpu/drm/drm_blend.c b/drivers/gpu/drm/drm_blend.c
index a16a74d7e15e..05e5632ce375 100644
--- a/drivers/gpu/drm/drm_blend.c
+++ b/drivers/gpu/drm/drm_blend.c
@@ -107,6 +107,11 @@
  * planes. Without this property the primary plane is always below the 
cursor
  * plane, and ordering between all other planes is undefined.
  *
+ * colorkey:
+ * Color keying is set up with drm_plane_create_colorkey_properties().
+ * It adds support for replacing a range of colors with a transparent
+ * color in the plane.
+ *
  * Note that all the property extensions described here apply either to the
  * plane or the CRTC (e.g. for the background color, which currently is not
  * exposed and assumed to be black).
@@ -448,3 

[RFC PATCH v2 0/2] Implement standard color keying properties for DRM planes

2018-05-26 Thread Dmitry Osipenko
Hello, DRM maintainers!

Laurent Pinchart kindly agreed to allow me to pick up his work on
the generic colorkey DRM plane property [0]. I've reworked the original
patch a tad, hopefully making it flexible enough to cover various HW
capabilities.

Changes I've made:

- Some code clean up and reshuffle.

- Took into account some the Ville's Syrjälä review comments to [0].

- The number of common DRM colorkey properties grows from 4 to 9.
  New properties:
- colorkey.mask
- colorkey.format
- colorkey.inverted-match
- colorkey.replacement-mask
- colorkey.replacement-format

  Renamed properties:
- colorkey.value -> colorkey.replacement-value

- colorkey.mode userspace-property ENUM's got a bit more explicit
  names, like "src" -> "src-match-src-replace".

- No driver-specific modes / properties allowed, all unsupported
  features are simply rejected by the drivers.

This patchset includes initial colorkey property implementation for the
older NVIDIA Tegra's.

Please review, thanks.

[0] https://lists.freedesktop.org/archives/dri-devel/2017-December/160510.html

Dmitry Osipenko (2):
  drm: Add generic colorkey properties
  drm/tegra: plane: Implement generic colorkey property for older
Tegra's

 drivers/gpu/drm/drm_atomic.c  |  36 ++
 drivers/gpu/drm/drm_blend.c   | 229 ++
 drivers/gpu/drm/tegra/dc.c|  31 +
 drivers/gpu/drm/tegra/dc.h|   7 ++
 drivers/gpu/drm/tegra/plane.c | 147 ++
 drivers/gpu/drm/tegra/plane.h |   1 +
 include/drm/drm_blend.h   |   3 +
 include/drm/drm_plane.h   |  77 
 8 files changed, 531 insertions(+)

-- 
2.17.0



[RFC PATCH v2 2/2] drm/tegra: plane: Implement generic colorkey property for older Tegra's

2018-05-26 Thread Dmitry Osipenko
Color keying allows to draw on top of overlapping planes, like for
example on top of a video plane. Older Tegra's have a limited color
keying capability, such that blending features are reduced when color
keying is enabled. In particular dependent weighting isn't possible,
meaning that cursors plane can't be displayed properly. In most cases
it is more useful to display content on top of video overlay, so
sacrificing mouse cursor in the area of three planes intersection with
colorkey mismatch is a reasonable tradeoff.

This patch implements the generic DRM colorkey property. For the starter
a minimal color keying support is implemented, it is enough to provide
userspace like Opentegra Xorg driver with ability to support color keying
by the XVideo extension.

Signed-off-by: Dmitry Osipenko <dig...@gmail.com>
---
 drivers/gpu/drm/tegra/dc.c|  31 +++
 drivers/gpu/drm/tegra/dc.h|   7 ++
 drivers/gpu/drm/tegra/plane.c | 147 ++
 drivers/gpu/drm/tegra/plane.h |   1 +
 4 files changed, 186 insertions(+)

diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index 31e12a9dfcb8..a5add64e40e2 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -172,6 +172,11 @@ static void tegra_plane_setup_blending_legacy(struct 
tegra_plane *plane)
 
state = to_tegra_plane_state(plane->base.state);
 
+   if (state->ckey_enabled) {
+   background[0] |= BLEND_COLOR_KEY_0;
+   background[2] |= BLEND_COLOR_KEY_0;
+   }
+
if (state->opaque) {
/*
 * Since custom fix-weight blending isn't utilized and weight
@@ -776,6 +781,11 @@ static struct drm_plane *tegra_primary_plane_create(struct 
drm_device *drm,
drm_plane_helper_add(>base, _plane_helper_funcs);
drm_plane_create_zpos_property(>base, plane->index, 0, 255);
 
+   if (dc->soc->has_legacy_blending)
+   drm_plane_create_colorkey_properties(>base,
+   BIT(DRM_PLANE_COLORKEY_MODE_DISABLED) |
+   BIT(DRM_PLANE_COLORKEY_MODE_DST));
+
return >base;
 }
 
@@ -1053,6 +1063,11 @@ static struct drm_plane 
*tegra_dc_overlay_plane_create(struct drm_device *drm,
drm_plane_helper_add(>base, _plane_helper_funcs);
drm_plane_create_zpos_property(>base, plane->index, 0, 255);
 
+   if (dc->soc->has_legacy_blending)
+   drm_plane_create_colorkey_properties(>base,
+   BIT(DRM_PLANE_COLORKEY_MODE_DISABLED) |
+   BIT(DRM_PLANE_COLORKEY_MODE_DST));
+
return >base;
 }
 
@@ -1153,6 +1168,7 @@ tegra_crtc_atomic_duplicate_state(struct drm_crtc *crtc)
 {
struct tegra_dc_state *state = to_dc_state(crtc->state);
struct tegra_dc_state *copy;
+   unsigned int i;
 
copy = kmalloc(sizeof(*copy), GFP_KERNEL);
if (!copy)
@@ -1164,6 +1180,9 @@ tegra_crtc_atomic_duplicate_state(struct drm_crtc *crtc)
copy->div = state->div;
copy->planes = state->planes;
 
+   for (i = 0; i < 2; i++)
+   copy->ckey[i] = state->ckey[i];
+
return >base;
 }
 
@@ -1893,6 +1912,18 @@ static void tegra_crtc_atomic_flush(struct drm_crtc 
*crtc,
struct tegra_dc *dc = to_tegra_dc(crtc);
u32 value;
 
+   if (dc->soc->has_legacy_blending) {
+   tegra_dc_writel(dc,
+   state->ckey[0].lower, DC_DISP_COLOR_KEY0_LOWER);
+   tegra_dc_writel(dc,
+   state->ckey[0].upper, DC_DISP_COLOR_KEY0_UPPER);
+
+   tegra_dc_writel(dc,
+   state->ckey[1].lower, DC_DISP_COLOR_KEY1_LOWER);
+   tegra_dc_writel(dc,
+   state->ckey[1].upper, DC_DISP_COLOR_KEY1_UPPER);
+   }
+
value = state->planes << 8 | GENERAL_UPDATE;
tegra_dc_writel(dc, value, DC_CMD_STATE_CONTROL);
value = tegra_dc_readl(dc, DC_CMD_STATE_CONTROL);
diff --git a/drivers/gpu/drm/tegra/dc.h b/drivers/gpu/drm/tegra/dc.h
index e96f582ca692..8209cb7d598a 100644
--- a/drivers/gpu/drm/tegra/dc.h
+++ b/drivers/gpu/drm/tegra/dc.h
@@ -18,6 +18,11 @@
 
 struct tegra_output;
 
+struct tegra_dc_color_key_state {
+   u32 lower;
+   u32 upper;
+};
+
 struct tegra_dc_state {
struct drm_crtc_state base;
 
@@ -26,6 +31,8 @@ struct tegra_dc_state {
unsigned int div;
 
u32 planes;
+
+   struct tegra_dc_color_key_state ckey[2];
 };
 
 static inline struct tegra_dc_state *to_dc_state(struct drm_crtc_state *state)
diff --git a/drivers/gpu/drm/tegra/plane.c b/drivers/gpu/drm/tegra/plane.c
index 0406c2ef432c..ba08b66d2499 100644
--- a/drivers/gpu/drm/tegra/plane.c
+++ b/drivers/gpu/drm/tegra/plane.c
@@ -57,6 +57,7 @@ tegra

[PATCH v1] media: dt: bindings: tegra-vde: Document new optional Memory Client reset property

2018-05-26 Thread Dmitry Osipenko
Recently binding of the Memory Controller has been extended, exposing
the Memory Client reset controls and hence it is now a reset controller.
Tegra video-decoder device is among the Memory Controller reset users,
document the new optional VDE HW reset property.

Signed-off-by: Dmitry Osipenko <dig...@gmail.com>
---
 .../devicetree/bindings/media/nvidia,tegra-vde.txt| 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/media/nvidia,tegra-vde.txt 
b/Documentation/devicetree/bindings/media/nvidia,tegra-vde.txt
index 470237ed6fe5..7302e949e662 100644
--- a/Documentation/devicetree/bindings/media/nvidia,tegra-vde.txt
+++ b/Documentation/devicetree/bindings/media/nvidia,tegra-vde.txt
@@ -27,9 +27,15 @@ Required properties:
   - sxe
 - clocks : Must include the following entries:
   - vde
-- resets : Must include the following entries:
+- resets : Must contain an entry for each entry in reset-names.
+- reset-names : Should include the following entries:
   - vde
 
+Optional properties:
+- resets : Must contain an entry for each entry in reset-names.
+- reset-names : Must include the following entries:
+  - mc
+
 Example:
 
 video-codec@6001a000 {
@@ -51,5 +57,6 @@ video-codec@6001a000 {
 ; /* SXE interrupt */
interrupt-names = "sync-token", "bsev", "sxe";
clocks = <_car TEGRA20_CLK_VDE>;
-   resets = <_car 61>;
+   reset-names = "vde", "mc";
+   resets = <_car 61>, < TEGRA20_MC_RESET_VDE>;
 };
-- 
2.17.0



[PATCH v2] media: staging: tegra-vde: Reset memory client

2018-05-26 Thread Dmitry Osipenko
DMA requests must be blocked before resetting VDE HW, otherwise it is
possible to get a memory corruption or a machine hang. Use the reset
control provided by the Memory Controller to block DMA before resetting
the VDE HW.

Signed-off-by: Dmitry Osipenko <dig...@gmail.com>
---

Changelog:

v2:
- Reset HW even if Memory Client resetting fails.

 drivers/staging/media/tegra-vde/tegra-vde.c | 35 +++--
 1 file changed, 33 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/media/tegra-vde/tegra-vde.c 
b/drivers/staging/media/tegra-vde/tegra-vde.c
index 90177a59b97c..6f06061a40d9 100644
--- a/drivers/staging/media/tegra-vde/tegra-vde.c
+++ b/drivers/staging/media/tegra-vde/tegra-vde.c
@@ -73,6 +73,7 @@ struct tegra_vde {
struct mutex lock;
struct miscdevice miscdev;
struct reset_control *rst;
+   struct reset_control *rst_mc;
struct gen_pool *iram_pool;
struct completion decode_completion;
struct clk *clk;
@@ -850,9 +851,23 @@ static int tegra_vde_ioctl_decode_h264(struct tegra_vde 
*vde,
 * We rely on the VDE registers reset value, otherwise VDE
 * causes bus lockup.
 */
+   ret = reset_control_assert(vde->rst_mc);
+   if (ret) {
+   dev_err(dev, "DEC start: Failed to assert MC reset: %d\n",
+   ret);
+   goto put_runtime_pm;
+   }
+
ret = reset_control_reset(vde->rst);
if (ret) {
-   dev_err(dev, "Failed to reset HW: %d\n", ret);
+   dev_err(dev, "DEC start: Failed to reset HW: %d\n", ret);
+   goto put_runtime_pm;
+   }
+
+   ret = reset_control_deassert(vde->rst_mc);
+   if (ret) {
+   dev_err(dev, "DEC start: Failed to deassert MC reset: %d\n",
+   ret);
goto put_runtime_pm;
}
 
@@ -880,9 +895,18 @@ static int tegra_vde_ioctl_decode_h264(struct tegra_vde 
*vde,
ret = timeout;
}
 
+   /*
+* At first reset memory client to avoid resetting VDE HW in the
+* middle of DMA which could result into memory corruption or hang
+* the whole system.
+*/
+   err = reset_control_assert(vde->rst_mc);
+   if (err)
+   dev_err(dev, "DEC end: Failed to assert MC reset: %d\n", err);
+
err = reset_control_assert(vde->rst);
if (err)
-   dev_err(dev, "Failed to assert HW reset: %d\n", err);
+   dev_err(dev, "DEC end: Failed to assert HW reset: %d\n", err);
 
 put_runtime_pm:
pm_runtime_mark_last_busy(dev);
@@ -1074,6 +1098,13 @@ static int tegra_vde_probe(struct platform_device *pdev)
return err;
}
 
+   vde->rst_mc = devm_reset_control_get_optional(dev, "mc");
+   if (IS_ERR(vde->rst_mc)) {
+   err = PTR_ERR(vde->rst_mc);
+   dev_err(dev, "Could not get MC reset %d\n", err);
+   return err;
+   }
+
irq = platform_get_irq_byname(pdev, "sync-token");
if (irq < 0)
return irq;
-- 
2.17.0



Re: [PATCH v1] media: staging: tegra-vde: Reset memory client

2018-05-26 Thread Dmitry Osipenko
On 20.05.2018 16:48, Dmitry Osipenko wrote:
> DMA requests must be blocked before resetting VDE HW, otherwise it is
> possible to get a memory corruption or a machine hang. Use the reset
> control provided by the Memory Controller to block DMA before resetting
> the VDE HW.
> 
> Signed-off-by: Dmitry Osipenko <dig...@gmail.com>
> ---
>  drivers/staging/media/tegra-vde/tegra-vde.c | 42 +++--
>  1 file changed, 38 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/staging/media/tegra-vde/tegra-vde.c 
> b/drivers/staging/media/tegra-vde/tegra-vde.c
> index 90177a59b97c..6dd3bf4481be 100644
> --- a/drivers/staging/media/tegra-vde/tegra-vde.c
> +++ b/drivers/staging/media/tegra-vde/tegra-vde.c
> @@ -73,6 +73,7 @@ struct tegra_vde {
>   struct mutex lock;
>   struct miscdevice miscdev;
>   struct reset_control *rst;
> + struct reset_control *rst_mc;
>   struct gen_pool *iram_pool;
>   struct completion decode_completion;
>   struct clk *clk;
> @@ -850,9 +851,23 @@ static int tegra_vde_ioctl_decode_h264(struct tegra_vde 
> *vde,
>* We rely on the VDE registers reset value, otherwise VDE
>* causes bus lockup.
>*/
> + ret = reset_control_assert(vde->rst_mc);
> + if (ret) {
> + dev_err(dev, "DEC start: Failed to assert MC reset: %d\n",
> + ret);
> + goto put_runtime_pm;
> + }
> +
>   ret = reset_control_reset(vde->rst);
>   if (ret) {
> - dev_err(dev, "Failed to reset HW: %d\n", ret);
> + dev_err(dev, "DEC start: Failed to reset HW: %d\n", ret);
> + goto put_runtime_pm;
> + }
> +
> + ret = reset_control_deassert(vde->rst_mc);
> + if (ret) {
> + dev_err(dev, "DEC start: Failed to deassert MC reset: %d\n",
> + ret);
>   goto put_runtime_pm;
>   }
>  
> @@ -880,9 +895,21 @@ static int tegra_vde_ioctl_decode_h264(struct tegra_vde 
> *vde,
>   ret = timeout;
>   }
>  
> - err = reset_control_assert(vde->rst);
> - if (err)
> - dev_err(dev, "Failed to assert HW reset: %d\n", err);
> + /*
> +  * At first reset memory client to avoid resetting VDE HW in the
> +  * middle of DMA which could result into memory corruption or hang
> +  * the whole system.
> +  */
> + err = reset_control_assert(vde->rst_mc);
> + if (!err) {

It occurred to me that there is no need to skip the HW reset if MC resetting
fails. I'll make V2 to change that.

> + err = reset_control_assert(vde->rst);
> + if (err)
> + dev_err(dev,
> + "DEC end: Failed to assert HW reset: %d\n",
> + err);
> + } else {
> + dev_err(dev, "DEC end: Failed to assert MC reset: %d\n", err);
> + }
>  
>  put_runtime_pm:
>   pm_runtime_mark_last_busy(dev);
> @@ -1074,6 +1101,13 @@ static int tegra_vde_probe(struct platform_device 
> *pdev)
>   return err;
>   }
>  
> + vde->rst_mc = devm_reset_control_get_optional(dev, "mc");
> + if (IS_ERR(vde->rst_mc)) {
> + err = PTR_ERR(vde->rst_mc);
> + dev_err(dev, "Could not get MC reset %d\n", err);
> + return err;
> + }
> +
>   irq = platform_get_irq_byname(pdev, "sync-token");
>   if (irq < 0)
>   return irq;
> 



[PATCH v1] media: staging: tegra-vde: Reset memory client

2018-05-20 Thread Dmitry Osipenko
DMA requests must be blocked before resetting VDE HW, otherwise it is
possible to get a memory corruption or a machine hang. Use the reset
control provided by the Memory Controller to block DMA before resetting
the VDE HW.

Signed-off-by: Dmitry Osipenko <dig...@gmail.com>
---
 drivers/staging/media/tegra-vde/tegra-vde.c | 42 +++--
 1 file changed, 38 insertions(+), 4 deletions(-)

diff --git a/drivers/staging/media/tegra-vde/tegra-vde.c 
b/drivers/staging/media/tegra-vde/tegra-vde.c
index 90177a59b97c..6dd3bf4481be 100644
--- a/drivers/staging/media/tegra-vde/tegra-vde.c
+++ b/drivers/staging/media/tegra-vde/tegra-vde.c
@@ -73,6 +73,7 @@ struct tegra_vde {
struct mutex lock;
struct miscdevice miscdev;
struct reset_control *rst;
+   struct reset_control *rst_mc;
struct gen_pool *iram_pool;
struct completion decode_completion;
struct clk *clk;
@@ -850,9 +851,23 @@ static int tegra_vde_ioctl_decode_h264(struct tegra_vde 
*vde,
 * We rely on the VDE registers reset value, otherwise VDE
 * causes bus lockup.
 */
+   ret = reset_control_assert(vde->rst_mc);
+   if (ret) {
+   dev_err(dev, "DEC start: Failed to assert MC reset: %d\n",
+   ret);
+   goto put_runtime_pm;
+   }
+
ret = reset_control_reset(vde->rst);
if (ret) {
-   dev_err(dev, "Failed to reset HW: %d\n", ret);
+   dev_err(dev, "DEC start: Failed to reset HW: %d\n", ret);
+   goto put_runtime_pm;
+   }
+
+   ret = reset_control_deassert(vde->rst_mc);
+   if (ret) {
+   dev_err(dev, "DEC start: Failed to deassert MC reset: %d\n",
+   ret);
goto put_runtime_pm;
}
 
@@ -880,9 +895,21 @@ static int tegra_vde_ioctl_decode_h264(struct tegra_vde 
*vde,
ret = timeout;
}
 
-   err = reset_control_assert(vde->rst);
-   if (err)
-   dev_err(dev, "Failed to assert HW reset: %d\n", err);
+   /*
+* At first reset memory client to avoid resetting VDE HW in the
+* middle of DMA which could result into memory corruption or hang
+* the whole system.
+*/
+   err = reset_control_assert(vde->rst_mc);
+   if (!err) {
+   err = reset_control_assert(vde->rst);
+   if (err)
+   dev_err(dev,
+   "DEC end: Failed to assert HW reset: %d\n",
+   err);
+   } else {
+   dev_err(dev, "DEC end: Failed to assert MC reset: %d\n", err);
+   }
 
 put_runtime_pm:
pm_runtime_mark_last_busy(dev);
@@ -1074,6 +1101,13 @@ static int tegra_vde_probe(struct platform_device *pdev)
return err;
}
 
+   vde->rst_mc = devm_reset_control_get_optional(dev, "mc");
+   if (IS_ERR(vde->rst_mc)) {
+   err = PTR_ERR(vde->rst_mc);
+   dev_err(dev, "Could not get MC reset %d\n", err);
+   return err;
+   }
+
irq = platform_get_irq_byname(pdev, "sync-token");
if (irq < 0)
return irq;
-- 
2.17.0



[PATCH v1 2/2] ARM: dts: tegra30: Add Memory Client reset to VDE

2018-05-20 Thread Dmitry Osipenko
Hook up Memory Client reset of the Video Decoder to the decoders DT node.

Signed-off-by: Dmitry Osipenko <dig...@gmail.com>
---
 arch/arm/boot/dts/tegra30.dtsi | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/tegra30.dtsi b/arch/arm/boot/dts/tegra30.dtsi
index 09087b9c5e26..3300ff976053 100644
--- a/arch/arm/boot/dts/tegra30.dtsi
+++ b/arch/arm/boot/dts/tegra30.dtsi
@@ -404,7 +404,8 @@
 ; /* SXE interrupt 
*/
interrupt-names = "sync-token", "bsev", "sxe";
clocks = <_car TEGRA30_CLK_VDE>;
-   resets = <_car 61>;
+   reset-names = "vde", "mc";
+   resets = <_car 61>, < TEGRA30_MC_RESET_VDE>;
};
 
apbmisc@7800 {
@@ -712,6 +713,7 @@
interrupts = ;
 
#iommu-cells = <1>;
+   #reset-cells = <1>;
};
 
fuse@7000f800 {
-- 
2.17.0



[PATCH v1 1/2] ARM: dts: tegra20: Add Memory Client reset to VDE

2018-05-20 Thread Dmitry Osipenko
Hook up Memory Client reset of the Video Decoder to the decoders DT node.

Signed-off-by: Dmitry Osipenko <dig...@gmail.com>
---
 arch/arm/boot/dts/tegra20.dtsi | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi
index 983dd5c14794..f9495f12e731 100644
--- a/arch/arm/boot/dts/tegra20.dtsi
+++ b/arch/arm/boot/dts/tegra20.dtsi
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -282,7 +283,8 @@
 ; /* SXE interrupt 
*/
interrupt-names = "sync-token", "bsev", "sxe";
clocks = <_car TEGRA20_CLK_VDE>;
-   resets = <_car 61>;
+   reset-names = "vde", "mc";
+   resets = <_car 61>, < TEGRA20_MC_RESET_VDE>;
};
 
apbmisc@7800 {
@@ -593,11 +595,12 @@
clock-names = "pclk", "clk32k_in";
};
 
-   memory-controller@7000f000 {
+   mc: memory-controller@7000f000 {
compatible = "nvidia,tegra20-mc";
reg = <0x7000f000 0x024
   0x7000f03c 0x3c4>;
interrupts = ;
+   #reset-cells = <1>;
};
 
iommu@7000f024 {
-- 
2.17.0



Re: [PATCH/RFC 1/4] drm: Add colorkey properties

2018-04-25 Thread Dmitry Osipenko
On 17.12.2017 03:17, Laurent Pinchart wrote:
> Color keying is the action of replacing pixels matching a given color
> (or range of colors) with transparent pixels in an overlay when
> performing blitting. Depending on the hardware capabilities, the
> matching pixel can either become fully transparent, or gain a
> programmable alpha value.
> 
> Color keying is found in a large number of devices whose capabilities
> often differ, but they still have enough common features in range to
> standardize color key properties. This commit adds four properties
> related to color keying named colorkey.min, colorkey.max, colorkey.alpha
> and colorkey.mode. Additional properties can be defined by drivers to
> expose device-specific features.
> 
> Signed-off-by: Laurent Pinchart <laurent.pinchart+rene...@ideasonboard.com>

Reviewed-by: Dmitry Osipenko <dig...@gmail.com>
Tested-by: Dmitry Osipenko <dig...@gmail.com>

Note that this patch needs to be rebased now.


Re: [PATCH/RFC 0/4] Implement standard color keying properties

2018-04-25 Thread Dmitry Osipenko
Hello Laurent,

On 17.12.2017 03:17, Laurent Pinchart wrote:
> Hello,
> 
> This patch series is an attempt at implementing standard properties for color
> keying support in the KMS API.

I was looking at implementing colorkey support for NVIDIA Tegra (older Tegra's
in particular) and Daniel Vetter suggested that colorkey should be implemented
as a generic plane property, instead of a custom one that I had in the patch
[0]. I've applied your RFC patch and replaced custom property with the generic,
it works well.

Very likely that all HW should be capable of making pixel completely transparent
when it matches a specified color, that could be one of common modes. Though
there could be limitations, like Tegra's aren't able to do blending-over of
planes if colorkey'ing is enabled. The 'colorkey.mode' property allows driver to
expose both common properties and a custom ones. In case of Tegra we could
implement a common property such that atomic commit will be rejected if planes
blending mode aren't compatible with the enabled colorkey'ing, and have a custom
property for a custom HW-aware application that won't be rejected (for example
Opentegra's Xv extension). The common modes could be derived later, once generic
property will get more usage by a variety of drivers. For now I don't see any
issues with your approach and hope to see this series applied in upstream to get
use of them, please continue your effort.

[0] https://patchwork.kernel.org/patch/10342849/


[PATCH v1 1/5] media: staging: tegra-vde: Align bitstream size to 16K

2018-03-17 Thread Dmitry Osipenko
I've noticed that decoding fails sometime if size of bitstream buffer
isn't aligned to 16K, probably because HW fetches data from memory in
a 16K granularity and if the last chunk of data isn't aligned, HW reads
garbage data beyond the dmabuf and tries to parse it.

Signed-off-by: Dmitry Osipenko <dig...@gmail.com>
---
 drivers/staging/media/tegra-vde/tegra-vde.c | 40 -
 1 file changed, 17 insertions(+), 23 deletions(-)

diff --git a/drivers/staging/media/tegra-vde/tegra-vde.c 
b/drivers/staging/media/tegra-vde/tegra-vde.c
index c47659e96089..c2ff2071b23c 100644
--- a/drivers/staging/media/tegra-vde/tegra-vde.c
+++ b/drivers/staging/media/tegra-vde/tegra-vde.c
@@ -440,7 +440,7 @@ static int tegra_vde_setup_hw_context(struct tegra_vde *vde,
VDE_WR(value, vde->sxe + 0x4C);
 
value = 0x0380;
-   value |= min_t(size_t, bitstream_data_size, SZ_1M);
+   value |= bitstream_data_size & GENMASK(19, 15);
 
VDE_WR(value, vde->sxe + 0x68);
 
@@ -522,7 +522,8 @@ static void tegra_vde_detach_and_put_dmabuf(struct 
dma_buf_attachment *a,
 static int tegra_vde_attach_dmabuf(struct device *dev,
   int fd,
   unsigned long offset,
-  unsigned int min_size,
+  size_t min_size,
+  size_t align_size,
   struct dma_buf_attachment **a,
   dma_addr_t *addr,
   struct sg_table **s,
@@ -540,9 +541,16 @@ static int tegra_vde_attach_dmabuf(struct device *dev,
return PTR_ERR(dmabuf);
}
 
+   if (dmabuf->size & (align_size - 1)) {
+   dev_err(dev, "Unaligned dmabuf 0x%zX, "
+"should be aligned to 0x%zX\n",
+   dmabuf->size, align_size);
+   return -EINVAL;
+   }
+
if ((u64)offset + min_size > dmabuf->size) {
dev_err(dev, "Too small dmabuf size %zu @0x%lX, "
-"should be at least %d\n",
+"should be at least %zu\n",
dmabuf->size, offset, min_size);
return -EINVAL;
}
@@ -596,7 +604,7 @@ static int tegra_vde_attach_dmabufs_to_frame(struct device 
*dev,
int err;
 
err = tegra_vde_attach_dmabuf(dev, src->y_fd,
- src->y_offset, csize * 4,
+ src->y_offset, csize * 4, SZ_256,
  >y_dmabuf_attachment,
  >y_addr,
  >y_sgt,
@@ -605,7 +613,7 @@ static int tegra_vde_attach_dmabufs_to_frame(struct device 
*dev,
return err;
 
err = tegra_vde_attach_dmabuf(dev, src->cb_fd,
- src->cb_offset, csize,
+ src->cb_offset, csize, SZ_256,
  >cb_dmabuf_attachment,
  >cb_addr,
  >cb_sgt,
@@ -614,7 +622,7 @@ static int tegra_vde_attach_dmabufs_to_frame(struct device 
*dev,
goto err_release_y;
 
err = tegra_vde_attach_dmabuf(dev, src->cr_fd,
- src->cr_offset, csize,
+ src->cr_offset, csize, SZ_256,
  >cr_dmabuf_attachment,
  >cr_addr,
  >cr_sgt,
@@ -628,7 +636,7 @@ static int tegra_vde_attach_dmabufs_to_frame(struct device 
*dev,
}
 
err = tegra_vde_attach_dmabuf(dev, src->aux_fd,
- src->aux_offset, csize,
+ src->aux_offset, csize, SZ_256,
  >aux_dmabuf_attachment,
  >aux_addr,
  >aux_sgt,
@@ -677,21 +685,6 @@ static int tegra_vde_validate_frame(struct device *dev,
return -EINVAL;
}
 
-   if (frame->y_offset & 0xFF) {
-   dev_err(dev, "Bad y_offset 0x%X\n", frame->y_offset);
-   return -EINVAL;
-   }
-
-   if (frame->cb_offset & 0xFF) {
-   dev_err(dev, "Bad cb_offset 0x%X\n", frame->cb_offset);
-   return -EINVAL;
-   }
-
-   if (frame->cr_offset & 0xFF) {
-   dev_err(dev, "Bad cr_offset 0x%X\n", frame->cr_offset);
-   return -EINVAL;
-   }
-
return 0;
 }
 
@@ -792,7 +785,8 @@ static i

[PATCH v1 3/5] media: staging: tegra-vde: Correct minimum size of U/V planes

2018-03-17 Thread Dmitry Osipenko
Stride of U/V planes must be aligned to 16 bytes (2 macroblocks). This
needs to be taken into account, otherwise it is possible to get a silent
memory corruption if dmabuf size is less than the size of decoded video
frame.

Signed-off-by: Dmitry Osipenko <dig...@gmail.com>
---
 drivers/staging/media/tegra-vde/tegra-vde.c | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/staging/media/tegra-vde/tegra-vde.c 
b/drivers/staging/media/tegra-vde/tegra-vde.c
index 14899c887d58..94b4db55cdb5 100644
--- a/drivers/staging/media/tegra-vde/tegra-vde.c
+++ b/drivers/staging/media/tegra-vde/tegra-vde.c
@@ -602,12 +602,12 @@ static int tegra_vde_attach_dmabufs_to_frame(struct 
device *dev,
 struct tegra_vde_h264_frame *src,
 enum dma_data_direction dma_dir,
 bool baseline_profile,
-size_t csize)
+size_t lsize, size_t csize)
 {
int err;
 
err = tegra_vde_attach_dmabuf(dev, src->y_fd,
- src->y_offset, csize * 4, SZ_256,
+ src->y_offset, lsize, SZ_256,
  >y_dmabuf_attachment,
  >y_addr,
  >y_sgt,
@@ -773,9 +773,11 @@ static int tegra_vde_ioctl_decode_h264(struct tegra_vde 
*vde,
enum dma_data_direction dma_dir;
dma_addr_t bitstream_data_addr;
dma_addr_t bsev_ptr;
+   size_t lsize, csize;
size_t bitstream_data_size;
unsigned int macroblocks_nb;
unsigned int read_bytes;
+   unsigned int cstride;
unsigned int i;
long timeout;
int ret, err;
@@ -814,6 +816,10 @@ static int tegra_vde_ioctl_decode_h264(struct tegra_vde 
*vde,
goto free_dpb_frames;
}
 
+   cstride = ALIGN(ctx.pic_width_in_mbs * 8, 16);
+   csize = cstride * ctx.pic_height_in_mbs * 8;
+   lsize = macroblocks_nb * 256;
+
for (i = 0; i < ctx.dpb_frames_nb; i++) {
ret = tegra_vde_validate_frame(dev, [i]);
if (ret)
@@ -827,7 +833,7 @@ static int tegra_vde_ioctl_decode_h264(struct tegra_vde 
*vde,
ret = tegra_vde_attach_dmabufs_to_frame(dev, _frames[i],
[i], dma_dir,
ctx.baseline_profile,
-   macroblocks_nb * 64);
+   lsize, csize);
if (ret)
goto release_dpb_frames;
}
-- 
2.16.1



[PATCH v1 2/5] media: staging: tegra-vde: Silence some of checkpatch warnings

2018-03-17 Thread Dmitry Osipenko
Make all strings single line to make them grep'able and add a comment
to the memory barrier.

Signed-off-by: Dmitry Osipenko <dig...@gmail.com>
---
 drivers/staging/media/tegra-vde/tegra-vde.c | 14 --
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/staging/media/tegra-vde/tegra-vde.c 
b/drivers/staging/media/tegra-vde/tegra-vde.c
index c2ff2071b23c..14899c887d58 100644
--- a/drivers/staging/media/tegra-vde/tegra-vde.c
+++ b/drivers/staging/media/tegra-vde/tegra-vde.c
@@ -368,6 +368,11 @@ static int tegra_vde_setup_hw_context(struct tegra_vde 
*vde,
tegra_vde_setup_iram_tables(vde, dpb_frames,
ctx->dpb_frames_nb - 1,
ctx->dpb_ref_frames_with_earlier_poc_nb);
+
+   /*
+* The IRAM mapping is write-combine, ensure that CPU buffers have
+* been flushed at this point.
+*/
wmb();
 
VDE_WR(0x, vde->bsev + 0x8C);
@@ -542,15 +547,13 @@ static int tegra_vde_attach_dmabuf(struct device *dev,
}
 
if (dmabuf->size & (align_size - 1)) {
-   dev_err(dev, "Unaligned dmabuf 0x%zX, "
-"should be aligned to 0x%zX\n",
+   dev_err(dev, "Unaligned dmabuf 0x%zX, should be aligned to 
0x%zX\n",
dmabuf->size, align_size);
return -EINVAL;
}
 
if ((u64)offset + min_size > dmabuf->size) {
-   dev_err(dev, "Too small dmabuf size %zu @0x%lX, "
-"should be at least %zu\n",
+   dev_err(dev, "Too small dmabuf size %zu @0x%lX, should be at 
least %zu\n",
dmabuf->size, offset, min_size);
return -EINVAL;
}
@@ -863,8 +866,7 @@ static int tegra_vde_ioctl_decode_h264(struct tegra_vde 
*vde,
macroblocks_nb = readl_relaxed(vde->sxe + 0xC8) & 0x1FFF;
read_bytes = bsev_ptr ? bsev_ptr - bitstream_data_addr : 0;
 
-   dev_err(dev, "Decoding failed: "
-   "read 0x%X bytes, %u macroblocks parsed\n",
+   dev_err(dev, "Decoding failed: read 0x%X bytes, %u macroblocks 
parsed\n",
read_bytes, macroblocks_nb);
 
ret = -EIO;
-- 
2.16.1



[PATCH v1 4/5] media: staging: tegra-vde: Do not handle spurious interrupts

2018-03-17 Thread Dmitry Osipenko
Do not handle interrupts if we haven't asked for them, potentially that
could happen if HW wasn't programmed properly.

Signed-off-by: Dmitry Osipenko <dig...@gmail.com>
---
 drivers/staging/media/tegra-vde/tegra-vde.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/staging/media/tegra-vde/tegra-vde.c 
b/drivers/staging/media/tegra-vde/tegra-vde.c
index 94b4db55cdb5..9e542c6288f1 100644
--- a/drivers/staging/media/tegra-vde/tegra-vde.c
+++ b/drivers/staging/media/tegra-vde/tegra-vde.c
@@ -935,6 +935,9 @@ static irqreturn_t tegra_vde_isr(int irq, void *data)
 {
struct tegra_vde *vde = data;
 
+   if (completion_done(>decode_completion))
+   return IRQ_NONE;
+
tegra_vde_set_bits(vde, 0, vde->frameid + 0x208);
complete(>decode_completion);
 
-- 
2.16.1



[PATCH v1 5/5] media: staging: tegra-vde: Correct included header

2018-03-17 Thread Dmitry Osipenko
This is Open Firmware driver, hence 'of_device.h' should be included
instead of 'platform_device.h'. Right now OF headers happen to be included
indirectly and this may break in the future, so let's correct the header.

Signed-off-by: Dmitry Osipenko <dig...@gmail.com>
---
 drivers/staging/media/tegra-vde/tegra-vde.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/media/tegra-vde/tegra-vde.c 
b/drivers/staging/media/tegra-vde/tegra-vde.c
index 9e542c6288f1..90177a59b97c 100644
--- a/drivers/staging/media/tegra-vde/tegra-vde.c
+++ b/drivers/staging/media/tegra-vde/tegra-vde.c
@@ -16,7 +16,7 @@
 #include 
 #include 
 #include 
-#include 
+#include 
 #include 
 #include 
 #include 
-- 
2.16.1



[PATCH v1 0/5] Tegra Video Decoder patches for 4.17

2018-03-17 Thread Dmitry Osipenko
Hello media maintainers,

I've been postponing sending out these patches for awhile because I was
waiting for a review for the Tegra memory controller patches that would
allow to reset VDE HW properly and was hoping that they will get into 4.17,
but it's getting quite late now and seems 4.18 is the best bet now for the
proper VDE reset. So here is a small patchset that addresses couple of
minor issues that I've spotted over time.

Dmitry Osipenko (5):
  media: staging: tegra-vde: Align bitstream size to 16K
  media: staging: tegra-vde: Silence some of checkpatch warnings
  media: staging: tegra-vde: Correct minimum size of U/V planes
  media: staging: tegra-vde: Do not handle spurious interrupts
  media: staging: tegra-vde: Correct included header

 drivers/staging/media/tegra-vde/tegra-vde.c | 63 -
 1 file changed, 34 insertions(+), 29 deletions(-)

-- 
2.16.1



Re: [RFCv4,19/21] media: vim2m: add request support

2018-03-12 Thread Dmitry Osipenko
On 12.03.2018 15:32, Alexandre Courbot wrote:
> On Mon, Mar 12, 2018 at 5:15 PM, Tomasz Figa <tf...@chromium.org> wrote:
>> Hi Paul, Dmitry,
>>
>> On Mon, Mar 12, 2018 at 5:10 PM, Paul Kocialkowski
>> <paul.kocialkow...@bootlin.com> wrote:
>>> Hi,
>>>
>>> On Sun, 2018-03-11 at 22:42 +0300, Dmitry Osipenko wrote:
>>>> Hello,
>>>>
>>>> On 07.03.2018 19:37, Paul Kocialkowski wrote:
>>>>> Hi,
>>>>>
>>>>> First off, I'd like to take the occasion to say thank-you for your
>>>>> work.
>>>>> This is a major piece of plumbing that is required for me to add
>>>>> support
>>>>> for the Allwinner CedarX VPU hardware in upstream Linux. Other
>>>>> drivers,
>>>>> such as tegra-vde (that was recently merged in staging) are also
>>>>> badly
>>>>> in need of this API.
>>>>
>>>> Certainly it would be good to have a common UAPI. Yet I haven't got my
>>>> hands on
>>>> trying to implement the V4L interface for the tegra-vde driver, but
>>>> I've taken a
>>>> look at Cedrus driver and for now I've one question:
>>>>
>>>> Would it be possible (or maybe already is) to have a single IOCTL that
>>>> takes input/output buffers with codec parameters, processes the
>>>> request(s) and returns to userspace when everything is done? Having 5
>>>> context switches for a single frame decode (like Cedrus VAAPI driver
>>>> does) looks like a bit of overhead.
>>>
>>> The V4L2 interface exposes ioctls for differents actions and I don't
>>> think there's a combined ioctl for this. The request API was introduced
>>> precisely because we need to have consistency between the various ioctls
>>> needed for each frame. Maybe one single (atomic) ioctl would have worked
>>> too, but that's apparently not how the V4L2 API was designed.
>>>
>>> I don't think there is any particular overhead caused by having n ioctls
>>> instead of a single one. At least that would be very surprising IMHO.
>>
>> Well, there is small syscall overhead, which normally shouldn't be
>> very painful, although with all the speculative execution hardening,
>> can't be sure of anything anymore. :)
>>
>> Hans and Alex can correct me if I'm wrong, but I believe there is a
>> more atomic-like API being planned, which would only need one IOCTL to
>> do everything. However, that would be a more serious change to the
>> V4L2 interfaces, so should be decoupled from Request API itself.
> 
> Indeed, we discussed the possibility to setup and submit requests in
> one syscall, similarly (at least in spirit) to the DRM atomic API.
> 
> This has only been discussed though, and as a feature to consider
> *after* the request API is merged for codecs (as the more complex
> camera use-cases would benefit more from it). As Tomasz mentioned, the
> overhead of ioctls is somehow negligible compared to the workload of
> the encoding/decoding itself, although I suppose it can still add up.
> The main advantage I can see for this is a simpler and less
> error-prone setup of requests for user-space.

Indeed, atomic API should be nicer from a userspace perspective and a bit more
optimal for at least some of workloads. Would be good to have it.


Re: [RFCv4,19/21] media: vim2m: add request support

2018-03-12 Thread Dmitry Osipenko
On 12.03.2018 11:29, Tomasz Figa wrote:
> On Mon, Mar 12, 2018 at 5:25 PM, Paul Kocialkowski
> <paul.kocialkow...@bootlin.com> wrote:
>> Hi,
>>
>> On Mon, 2018-03-12 at 17:15 +0900, Tomasz Figa wrote:
>>> Hi Paul, Dmitry,
>>>
>>> On Mon, Mar 12, 2018 at 5:10 PM, Paul Kocialkowski
>>> <paul.kocialkow...@bootlin.com> wrote:
>>>> Hi,
>>>>
>>>> On Sun, 2018-03-11 at 22:42 +0300, Dmitry Osipenko wrote:
>>>>> Hello,
>>>>>
>>>>> On 07.03.2018 19:37, Paul Kocialkowski wrote:
>>>>>> Hi,
>>>>>>
>>>>>> First off, I'd like to take the occasion to say thank-you for
>>>>>> your
>>>>>> work.
>>>>>> This is a major piece of plumbing that is required for me to add
>>>>>> support
>>>>>> for the Allwinner CedarX VPU hardware in upstream Linux. Other
>>>>>> drivers,
>>>>>> such as tegra-vde (that was recently merged in staging) are also
>>>>>> badly
>>>>>> in need of this API.
>>>>>
>>>>> Certainly it would be good to have a common UAPI. Yet I haven't
>>>>> got my
>>>>> hands on
>>>>> trying to implement the V4L interface for the tegra-vde driver,
>>>>> but
>>>>> I've taken a
>>>>> look at Cedrus driver and for now I've one question:
>>>>>
>>>>> Would it be possible (or maybe already is) to have a single IOCTL
>>>>> that
>>>>> takes input/output buffers with codec parameters, processes the
>>>>> request(s) and returns to userspace when everything is done?
>>>>> Having 5
>>>>> context switches for a single frame decode (like Cedrus VAAPI
>>>>> driver
>>>>> does) looks like a bit of overhead.
>>>>
>>>> The V4L2 interface exposes ioctls for differents actions and I don't
>>>> think there's a combined ioctl for this. The request API was
>>>> introduced
>>>> precisely because we need to have consistency between the various
>>>> ioctls
>>>> needed for each frame. Maybe one single (atomic) ioctl would have
>>>> worked
>>>> too, but that's apparently not how the V4L2 API was designed.
>>>>
>>>> I don't think there is any particular overhead caused by having n
>>>> ioctls
>>>> instead of a single one. At least that would be very surprising
>>>> IMHO.
>>>
>>> Well, there is small syscall overhead, which normally shouldn't be
>>> very painful, although with all the speculative execution hardening,
>>> can't be sure of anything anymore. :)
>>
>> Oh, my mistake then, I had it in mind that it is not really something
>> noticeable. Hopefully, it won't be a limiting factor in our cases.
> 
> With typical frame rates achievable by hardware codecs, I doubt that
> it would be a limiting factor. We're using a similar API (a WiP
> version of pre-Request API prototype from long ago) in Chrome OS
> already without any performance issues.

Thank you very much for the answers!

The syscalls overhead is miserable in comparison to the rest of decoding, though
I wanted to clarify whether there is a way to avoid it. Atomic API sounds like
something that would suit well for that.


Re: [RFCv4,19/21] media: vim2m: add request support

2018-03-11 Thread Dmitry Osipenko
Hello,

On 07.03.2018 19:37, Paul Kocialkowski wrote:
> Hi,
> 
> First off, I'd like to take the occasion to say thank-you for your work.
> This is a major piece of plumbing that is required for me to add support
> for the Allwinner CedarX VPU hardware in upstream Linux. Other drivers,
> such as tegra-vde (that was recently merged in staging) are also badly
> in need of this API.

Certainly it would be good to have a common UAPI. Yet I haven't got my hands on
trying to implement the V4L interface for the tegra-vde driver, but I've taken a
look at Cedrus driver and for now I've one question:

Would it be possible (or maybe already is) to have a single IOCTL that takes
input/output buffers with codec parameters, processes the request(s) and returns
to userspace when everything is done? Having 5 context switches for a single
frame decode (like Cedrus VAAPI driver does) looks like a bit of overhead.

> I have a few comments based on my experience integrating this request
> API with the Cedrus VPU driver (and the associated libva backend), that
> also concern the vim2m driver.
> 
> On Tue, 2018-02-20 at 13:44 +0900, Alexandre Courbot wrote:
>> Set the necessary ops for supporting requests in vim2m.
>>
>> Signed-off-by: Alexandre Courbot 
>> ---
>>  drivers/media/platform/Kconfig |  1 +
>>  drivers/media/platform/vim2m.c | 75
>> ++
>>  2 files changed, 76 insertions(+)
>>
>> diff --git a/drivers/media/platform/Kconfig
>> b/drivers/media/platform/Kconfig
>> index 614fbef08ddc..09be0b5f9afe 100644
>> --- a/drivers/media/platform/Kconfig
>> +++ b/drivers/media/platform/Kconfig
> 
> [...]
> 
>> +static int vim2m_request_submit(struct media_request *req,
>> +struct media_request_entity_data
>> *_data)
>> +{
>> +struct v4l2_request_entity_data *data;
>> +
>> +data = to_v4l2_entity_data(_data);
> 
> We need to call v4l2_m2m_try_schedule here so that m2m scheduling can
> happen when only 2 buffers were queued and no other action was taken
> from usespace. In that scenario, m2m scheduling currently doesn't
> happen.
> 
> However, this requires access to the m2m context, which is not easy to
> get from req or _data. I'm not sure that some container_of magic would
> even do the trick here.
> 
>> +return vb2_request_submit(data);
> 
> vb2_request_submit does not lock the associated request mutex although
> it accesses the associated queued buffers list, which I believe this
> mutex is supposed to protect.
> 
> We could either wrap this call with media_request_lock(req) and
> media_request_unlock(req) or have the lock in the function itself, which
> would require passing it the req pointer.
> 
> The latter would probably be safer for future use of the function.
> 
>> +}
>> +
>> +static const struct media_request_entity_ops vim2m_request_entity_ops
>> = {
>> +.data_alloc = vim2m_entity_data_alloc,
>> +.data_free  = v4l2_request_entity_data_free,
>> +.submit = vim2m_request_submit,
>> +};
>> +
>>  /*
>>   * File operations
>>   */
>> @@ -900,6 +967,9 @@ static int vim2m_open(struct file *file)
>>  ctx->dev = dev;
>>  hdl = >hdl;
>>  v4l2_ctrl_handler_init(hdl, 4);
>> +v4l2_request_entity_init(>req_entity,
>> _request_entity_ops,
>> + >dev->vfd);
>> +ctx->fh.entity = >req_entity.base;
>>  v4l2_ctrl_new_std(hdl, _ctrl_ops, V4L2_CID_HFLIP, 0, 1,
>> 1, 0);
>>  v4l2_ctrl_new_std(hdl, _ctrl_ops, V4L2_CID_VFLIP, 0, 1,
>> 1, 0);
>>  v4l2_ctrl_new_custom(hdl, _ctrl_trans_time_msec, NULL);
>> @@ -999,6 +1069,9 @@ static int vim2m_probe(struct platform_device
>> *pdev)
>>  if (!dev)
>>  return -ENOMEM;
>>  
>> +v4l2_request_mgr_init(>req_mgr, >vfd,
>> +  _request_ops);
>> +
>>  spin_lock_init(>irqlock);
>>  
>>  ret = v4l2_device_register(>dev, >v4l2_dev);
>> @@ -1012,6 +1085,7 @@ static int vim2m_probe(struct platform_device
>> *pdev)
>>  vfd = >vfd;
>>  vfd->lock = >dev_mutex;
>>  vfd->v4l2_dev = >v4l2_dev;
>> +vfd->req_mgr = >req_mgr.base;
>>  
>>  ret = video_register_device(vfd, VFL_TYPE_GRABBER, 0);
>>  if (ret) {
>> @@ -1054,6 +1128,7 @@ static int vim2m_remove(struct platform_device
>> *pdev)
>>  del_timer_sync(>timer);
>>  video_unregister_device(>vfd);
>>  v4l2_device_unregister(>v4l2_dev);
>> +v4l2_request_mgr_free(>req_mgr);
>>  
>>  return 0;
>>  }
> 


-- 
Dmitry


Re: [PATCH] media: staging: tegra-vde: select DMA_SHARED_BUFFER

2018-01-05 Thread Dmitry Osipenko
On 05.01.2018 12:43, Arnd Bergmann wrote:
> Without CONFIG_DMA_SHARED_BUFFER we run into a link error for the
> dma_buf_* APIs:
> 
> ERROR: "dma_buf_map_attachment" 
> [drivers/staging/media/tegra-vde/tegra-vde.ko] undefined!
> ERROR: "dma_buf_attach" [drivers/staging/media/tegra-vde/tegra-vde.ko] 
> undefined!
> ERROR: "dma_buf_get" [drivers/staging/media/tegra-vde/tegra-vde.ko] undefined!
> ERROR: "dma_buf_put" [drivers/staging/media/tegra-vde/tegra-vde.ko] undefined!
> ERROR: "dma_buf_detach" [drivers/staging/media/tegra-vde/tegra-vde.ko] 
> undefined!
> ERROR: "dma_buf_unmap_attachment" 
> [drivers/staging/media/tegra-vde/tegra-vde.ko] undefined!
> 
> Signed-off-by: Arnd Bergmann <a...@arndb.de>
> ---
>  drivers/staging/media/tegra-vde/Kconfig | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/drivers/staging/media/tegra-vde/Kconfig 
> b/drivers/staging/media/tegra-vde/Kconfig
> index ec3ebdaa..5c4914674468 100644
> --- a/drivers/staging/media/tegra-vde/Kconfig
> +++ b/drivers/staging/media/tegra-vde/Kconfig
> @@ -1,6 +1,7 @@
>  config TEGRA_VDE
>   tristate "NVIDIA Tegra Video Decoder Engine driver"
>   depends on ARCH_TEGRA || COMPILE_TEST
> + select DMA_SHARED_BUFFER
>   select SRAM
>   help
>   Say Y here to enable support for the NVIDIA Tegra video decoder
> 

Thanks!

Acked-by: Dmitry Osipenko <dig...@gmail.com>


Re: [PATCH v5 0/4] NVIDIA Tegra video decoder driver

2017-12-17 Thread Dmitry Osipenko
On 12.12.2017 03:26, Dmitry Osipenko wrote:
> VDE driver provides accelerated video decoding to NVIDIA Tegra SoC's,
> it is a result of reverse-engineering efforts. Driver has been tested on
> Toshiba AC100 and Acer A500, it should work on any Tegra20 device.
> 
> In userspace this driver is utilized by libvdpau-tegra [0] that implements
> VDPAU interface, so any video player that supports VDPAU can provide
> accelerated video decoding on Tegra20 on Linux.
> 
> [0] https://github.com/grate-driver/libvdpau-tegra

Thierry, driver has been approved by media maintainers and should appear in 4.16
(it is already in -next). Please schedule the DT patches for 4.16, thanks.


Re: [GIT PULL FOR v4.16] staging/media: add NVIDIA Tegra video decoder driver

2017-12-15 Thread Dmitry Osipenko
On 14.12.2017 14:06, Mauro Carvalho Chehab wrote:
> Em Tue, 12 Dec 2017 16:28:40 +0100
> Hans Verkuil <hverk...@xs4all.nl> escreveu:
> 
>> This adds a new NVIDIA Tegra video decoder driver. It is depending on the
>> request API work since it is a stateless codec, so for now park this in 
>> staging.
>>
>> The dts patches should go through nvidia's tree.
>>
>> Regards,
>>
>>  Hans
>>
>> The following changes since commit 330dada5957e3ca0c8811b14c45e3ac42c694651:
>>
>>   media: dvb_frontend: fix return error code (2017-12-12 07:50:14 -0500)
>>
>> are available in the Git repository at:
>>
>>   git://linuxtv.org/hverkuil/media_tree.git tegradec
>>
>> for you to fetch changes up to c3c530f45e48b33a2cc49cdeec246d255a5ca7db:
>>
>>   staging: media: Introduce NVIDIA Tegra video decoder driver (2017-12-12 
>> 16:06:06 +0100)
>>
>> 
>> Dmitry Osipenko (2):
>>   media: dt: bindings: Add binding for NVIDIA Tegra Video Decoder Engine
>>   staging: media: Introduce NVIDIA Tegra video decoder driver
> 
> Ok, clearly, there are some things that are not OK on the driver,
> otherwise, it won't be merging at staging. Yet, there warnings
> there that should be considered before moving it out of staging:

Sure, I'm aware of the checkpatch warnings and some of them aren't legit, others
aren't very important and would be corrected later. The main reason of going
into staging should be the lack of V4L2 interface support in the driver
(necessary V4L API isn't there yet), see TODO. Certainly there are other things
to be done besides the V4L interface before de-staging, going into staging is a
very good variant right now, thanks for allowing to do it!

[snip]


[PATCH v5 1/4] media: dt: bindings: Add binding for NVIDIA Tegra Video Decoder Engine

2017-12-11 Thread Dmitry Osipenko
Add binding documentation for the Video Decoder Engine which is found
on NVIDIA Tegra20/30/114/124/132 SoC's.

Signed-off-by: Dmitry Osipenko <dig...@gmail.com>
Acked-by: Rob Herring <r...@kernel.org>
---
 .../devicetree/bindings/media/nvidia,tegra-vde.txt | 55 ++
 1 file changed, 55 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/media/nvidia,tegra-vde.txt

diff --git a/Documentation/devicetree/bindings/media/nvidia,tegra-vde.txt 
b/Documentation/devicetree/bindings/media/nvidia,tegra-vde.txt
new file mode 100644
index ..470237ed6fe5
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/nvidia,tegra-vde.txt
@@ -0,0 +1,55 @@
+NVIDIA Tegra Video Decoder Engine
+
+Required properties:
+- compatible : Must contain one of the following values:
+   - "nvidia,tegra20-vde"
+   - "nvidia,tegra30-vde"
+   - "nvidia,tegra114-vde"
+   - "nvidia,tegra124-vde"
+   - "nvidia,tegra132-vde"
+- reg : Must contain an entry for each entry in reg-names.
+- reg-names : Must include the following entries:
+  - sxe
+  - bsev
+  - mbe
+  - ppe
+  - mce
+  - tfe
+  - ppb
+  - vdma
+  - frameid
+- iram : Must contain phandle to the mmio-sram device node that represents
+ IRAM region used by VDE.
+- interrupts : Must contain an entry for each entry in interrupt-names.
+- interrupt-names : Must include the following entries:
+  - sync-token
+  - bsev
+  - sxe
+- clocks : Must include the following entries:
+  - vde
+- resets : Must include the following entries:
+  - vde
+
+Example:
+
+video-codec@6001a000 {
+   compatible = "nvidia,tegra20-vde";
+   reg = <0x6001a000 0x1000 /* Syntax Engine */
+  0x6001b000 0x1000 /* Video Bitstream Engine */
+  0x6001c000  0x100 /* Macroblock Engine */
+  0x6001c200  0x100 /* Post-processing Engine */
+  0x6001c400  0x100 /* Motion Compensation Engine */
+  0x6001c600  0x100 /* Transform Engine */
+  0x6001c800  0x100 /* Pixel prediction block */
+  0x6001ca00  0x100 /* Video DMA */
+  0x6001d800  0x300 /* Video frame controls */>;
+   reg-names = "sxe", "bsev", "mbe", "ppe", "mce",
+   "tfe", "ppb", "vdma", "frameid";
+   iram = <_pool>; /* IRAM region */
+   interrupts = , /* Sync token interrupt 
*/
+, /* BSE-V interrupt */
+; /* SXE interrupt */
+   interrupt-names = "sync-token", "bsev", "sxe";
+   clocks = <_car TEGRA20_CLK_VDE>;
+   resets = <_car 61>;
+};
-- 
2.15.1



[PATCH v5 0/4] NVIDIA Tegra video decoder driver

2017-12-11 Thread Dmitry Osipenko
VDE driver provides accelerated video decoding to NVIDIA Tegra SoC's,
it is a result of reverse-engineering efforts. Driver has been tested on
Toshiba AC100 and Acer A500, it should work on any Tegra20 device.

In userspace this driver is utilized by libvdpau-tegra [0] that implements
VDPAU interface, so any video player that supports VDPAU can provide
accelerated video decoding on Tegra20 on Linux.

[0] https://github.com/grate-driver/libvdpau-tegra

Change log:
v5:
- Moved driver to staging/media as per Hans's Verkuil request
- Addressed review comments to v4 from Vladimir Zapolskiy and
  Dan Carpenter
- Updated 'TODO', reflecting that this driver require upcoming
  support of stateless decoders by V4L2
- Dropped patch that enabled VDE driver in tegra_defconfig for now
  as I realized that Tegra's DRM staging config is disabled there
  and right now we are relying on it in libvdpau-tegra
- Added myself to MAINTAINERS in the "Introduce driver" patch as per
  Vladimir's suggestion

v4:
- Added mmio-sram "IRAM DT node" patch from Vladimir Zapolskiy to
  the series, I modified it to cover all Tegra's and not only Tegra20
- Utilized genalloc for the reservation of IRAM region as per
  Vladimir's suggestion, VDE driver now selects SRAM driver in Kconfig
- Added defconfig patch to the series
- Described VDE registers in DT per HW unit, excluding BSE-A / UCQ
  and holes between the units
- Extended DT compatibility property with Tegra30/114/124/132 in the
  binding doc.
- Removed BSE-A interrupt from the DT binding because it's very
  likely that Audio Bitstream Engine isn't integrated with VDE
- Removed UCQ interrupt from the DT binding because in TRM it is
  represented as a distinct HW block that probably should have
  its own driver
- Addressed v3 review comments: factored out DT binding addition
  into a standalone patch, moved binding to media/, removed
  clocks/resets-names

v3:
- Suppressed compilation warnings reported by 'kbuild test robot'

v2:
- Addressed v1 review comments from Stephen Warren and Dan Carpenter
- Implemented runtime PM
- Miscellaneous code cleanups
- Changed 'TODO'
- CC'd media maintainers for the review as per Greg's K-H request,
  v1 can be viewed at https://lkml.org/lkml/2017/9/25/606

Dmitry Osipenko (3):
  media: dt: bindings: Add binding for NVIDIA Tegra Video Decoder Engine
  staging: media: Introduce NVIDIA Tegra video decoder driver
  ARM: dts: tegra20: Add video decoder node

Vladimir Zapolskiy (1):
  ARM: dts: tegra20: Add device tree node to describe IRAM

 .../devicetree/bindings/media/nvidia,tegra-vde.txt |   55 +
 MAINTAINERS|9 +
 arch/arm/boot/dts/tegra20.dtsi |   35 +
 drivers/staging/media/Kconfig  |2 +
 drivers/staging/media/Makefile |1 +
 drivers/staging/media/tegra-vde/Kconfig|7 +
 drivers/staging/media/tegra-vde/Makefile   |1 +
 drivers/staging/media/tegra-vde/TODO   |4 +
 drivers/staging/media/tegra-vde/tegra-vde.c| 1213 
 drivers/staging/media/tegra-vde/uapi.h |   78 ++
 10 files changed, 1405 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/media/nvidia,tegra-vde.txt
 create mode 100644 drivers/staging/media/tegra-vde/Kconfig
 create mode 100644 drivers/staging/media/tegra-vde/Makefile
 create mode 100644 drivers/staging/media/tegra-vde/TODO
 create mode 100644 drivers/staging/media/tegra-vde/tegra-vde.c
 create mode 100644 drivers/staging/media/tegra-vde/uapi.h

-- 
2.15.1



[PATCH v5 4/4] ARM: dts: tegra20: Add video decoder node

2017-12-11 Thread Dmitry Osipenko
Add Video Decoder Engine device node.

Signed-off-by: Dmitry Osipenko <dig...@gmail.com>
---
 arch/arm/boot/dts/tegra20.dtsi | 27 +++
 1 file changed, 27 insertions(+)

diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi
index 36909df653c3..864a95872b8d 100644
--- a/arch/arm/boot/dts/tegra20.dtsi
+++ b/arch/arm/boot/dts/tegra20.dtsi
@@ -16,6 +16,11 @@
#address-cells = <1>;
#size-cells = <1>;
ranges = <0 0x4000 0x4>;
+
+   vde_pool: vde {
+   reg = <0x400 0x3fc00>;
+   pool;
+   };
};
 
host1x@5000 {
@@ -258,6 +263,28 @@
*/
};
 
+   vde@6001a000 {
+   compatible = "nvidia,tegra20-vde";
+   reg = <0x6001a000 0x1000   /* Syntax Engine */
+  0x6001b000 0x1000   /* Video Bitstream Engine */
+  0x6001c000  0x100   /* Macroblock Engine */
+  0x6001c200  0x100   /* Post-processing Engine */
+  0x6001c400  0x100   /* Motion Compensation Engine */
+  0x6001c600  0x100   /* Transform Engine */
+  0x6001c800  0x100   /* Pixel prediction block */
+  0x6001ca00  0x100   /* Video DMA */
+  0x6001d800  0x300>; /* Video frame controls */
+   reg-names = "sxe", "bsev", "mbe", "ppe", "mce",
+   "tfe", "ppb", "vdma", "frameid";
+   iram = <_pool>; /* IRAM region */
+   interrupts = , /* Sync token 
interrupt */
+, /* BSE-V 
interrupt */
+; /* SXE interrupt 
*/
+   interrupt-names = "sync-token", "bsev", "sxe";
+   clocks = <_car TEGRA20_CLK_VDE>;
+   resets = <_car 61>;
+   };
+
apbmisc@7800 {
compatible = "nvidia,tegra20-apbmisc";
reg = <0x7800 0x64   /* Chip revision */
-- 
2.15.1



[PATCH v5 3/4] ARM: dts: tegra20: Add device tree node to describe IRAM

2017-12-11 Thread Dmitry Osipenko
From: Vladimir Zapolskiy <v...@mleia.com>

All Tegra20 SoCs contain 256KiB IRAM, which is used to store
resume code and by a video decoder engine.

Signed-off-by: Vladimir Zapolskiy <v...@mleia.com>
Signed-off-by: Dmitry Osipenko <dig...@gmail.com>
---
 arch/arm/boot/dts/tegra20.dtsi | 8 
 1 file changed, 8 insertions(+)

diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi
index 914f59166a99..36909df653c3 100644
--- a/arch/arm/boot/dts/tegra20.dtsi
+++ b/arch/arm/boot/dts/tegra20.dtsi
@@ -10,6 +10,14 @@
compatible = "nvidia,tegra20";
interrupt-parent = <>;
 
+   iram@4000 {
+   compatible = "mmio-sram";
+   reg = <0x4000 0x4>;
+   #address-cells = <1>;
+   #size-cells = <1>;
+   ranges = <0 0x4000 0x4>;
+   };
+
host1x@5000 {
compatible = "nvidia,tegra20-host1x", "simple-bus";
reg = <0x5000 0x00024000>;
-- 
2.15.1



[PATCH v5 2/4] staging: media: Introduce NVIDIA Tegra video decoder driver

2017-12-11 Thread Dmitry Osipenko
NVIDIA Tegra20/30/114/124/132 SoC's have video decoder engine that
supports standard set of video formats like H.264 / MPEG-4 / WMV / VC1.

Signed-off-by: Dmitry Osipenko <dig...@gmail.com>
---
 MAINTAINERS |9 +
 drivers/staging/media/Kconfig   |2 +
 drivers/staging/media/Makefile  |1 +
 drivers/staging/media/tegra-vde/Kconfig |7 +
 drivers/staging/media/tegra-vde/Makefile|1 +
 drivers/staging/media/tegra-vde/TODO|4 +
 drivers/staging/media/tegra-vde/tegra-vde.c | 1213 +++
 drivers/staging/media/tegra-vde/uapi.h  |   78 ++
 8 files changed, 1315 insertions(+)
 create mode 100644 drivers/staging/media/tegra-vde/Kconfig
 create mode 100644 drivers/staging/media/tegra-vde/Makefile
 create mode 100644 drivers/staging/media/tegra-vde/TODO
 create mode 100644 drivers/staging/media/tegra-vde/tegra-vde.c
 create mode 100644 drivers/staging/media/tegra-vde/uapi.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 7d195739f892..7f7c24949a06 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8706,6 +8706,15 @@ T:   git git://linuxtv.org/media_tree.git
 S: Maintained
 F: drivers/media/dvb-frontends/stv6111*
 
+MEDIA DRIVERS FOR NVIDIA TEGRA - VDE
+M: Dmitry Osipenko <dig...@gmail.com>
+L: linux-media@vger.kernel.org
+L: linux-te...@vger.kernel.org
+T: git git://linuxtv.org/media_tree.git
+S: Maintained
+F: Documentation/devicetree/bindings/media/nvidia,tegra-vde.txt
+F: drivers/staging/media/tegra-vde/
+
 MEDIA INPUT INFRASTRUCTURE (V4L/DVB)
 M: Mauro Carvalho Chehab <mche...@s-opensource.com>
 M: Mauro Carvalho Chehab <mche...@kernel.org>
diff --git a/drivers/staging/media/Kconfig b/drivers/staging/media/Kconfig
index 3a09140700e6..227437f22acf 100644
--- a/drivers/staging/media/Kconfig
+++ b/drivers/staging/media/Kconfig
@@ -31,4 +31,6 @@ source "drivers/staging/media/imx/Kconfig"
 
 source "drivers/staging/media/omap4iss/Kconfig"
 
+source "drivers/staging/media/tegra-vde/Kconfig"
+
 endif
diff --git a/drivers/staging/media/Makefile b/drivers/staging/media/Makefile
index f25327163c67..59a47f69884f 100644
--- a/drivers/staging/media/Makefile
+++ b/drivers/staging/media/Makefile
@@ -5,3 +5,4 @@ obj-$(CONFIG_VIDEO_IMX_MEDIA)   += imx/
 obj-$(CONFIG_VIDEO_DM365_VPFE) += davinci_vpfe/
 obj-$(CONFIG_VIDEO_OMAP4)  += omap4iss/
 obj-$(CONFIG_INTEL_ATOMISP) += atomisp/
+obj-$(CONFIG_TEGRA_VDE)+= tegra-vde/
diff --git a/drivers/staging/media/tegra-vde/Kconfig 
b/drivers/staging/media/tegra-vde/Kconfig
new file mode 100644
index ..ec3ebdaa
--- /dev/null
+++ b/drivers/staging/media/tegra-vde/Kconfig
@@ -0,0 +1,7 @@
+config TEGRA_VDE
+   tristate "NVIDIA Tegra Video Decoder Engine driver"
+   depends on ARCH_TEGRA || COMPILE_TEST
+   select SRAM
+   help
+   Say Y here to enable support for the NVIDIA Tegra video decoder
+   driver.
diff --git a/drivers/staging/media/tegra-vde/Makefile 
b/drivers/staging/media/tegra-vde/Makefile
new file mode 100644
index ..444c1d62daa1
--- /dev/null
+++ b/drivers/staging/media/tegra-vde/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_TEGRA_VDE)+= tegra-vde.o
diff --git a/drivers/staging/media/tegra-vde/TODO 
b/drivers/staging/media/tegra-vde/TODO
new file mode 100644
index ..31aaa3e66d80
--- /dev/null
+++ b/drivers/staging/media/tegra-vde/TODO
@@ -0,0 +1,4 @@
+TODO:
+   - Implement V4L2 API once it gains support for stateless decoders.
+
+Contact: Dmitry Osipenko <dig...@gmail.com>
diff --git a/drivers/staging/media/tegra-vde/tegra-vde.c 
b/drivers/staging/media/tegra-vde/tegra-vde.c
new file mode 100644
index ..c47659e96089
--- /dev/null
+++ b/drivers/staging/media/tegra-vde/tegra-vde.c
@@ -0,0 +1,1213 @@
+/*
+ * NVIDIA Tegra Video decoder driver
+ *
+ * Copyright (C) 2016-2017 Dmitry Osipenko <dig...@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include "uapi.h"
+
+#define ICMDQUE_WR 0x00
+#define CMDQUE_CONTROL 0x08
+#define INTR_STATUS0x18
+#define BSE_INT_ENB0x40
+#define BSE_CONFIG 0x44
+
+#define BSE_ICMDQUE_EMPTY  BIT(3)
+#define BSE_DMA_BUSY   BIT(23)
+
+#define VDE_WR(__data, __addr) \
+do {   \
+   dev_dbg(vde->miscdev.parent,\
+   "%s: %d: 0x%08X => " #__addr "

Re: [PATCH v4 3/5] staging: Introduce NVIDIA Tegra video decoder driver

2017-12-10 Thread Dmitry Osipenko
On 10.12.2017 22:29, Nicolas Dufresne wrote:
> Le dimanche 10 décembre 2017 à 21:56 +0300, Dmitry Osipenko a écrit :
>>> I've CC-ed Maxime and Giulio as well: they are looking into adding support 
>>> for
>>> the stateless allwinner codec based on this code as well. There may well be
>>> opportunities for you to work together, esp. on the userspace side. Note 
>>> that
>>> Rockchip has the same issue, they too have a stateless HW codec.
>>
>> IIUC, we will have to define video decoder parameters in V4L API and then 
>> make a
>> V4L driver / userspace prototype (ffmpeg for example) that will use the 
>> requests
>> API for video decoding in order to upstream the requests API. Does it sound 
>> good?
> 
> Chromium/Chrome already have support for that type of decoder in their
> tree. In theory, it should just work.

Everything is possible, in theory ;)


Re: [PATCH v4 3/5] staging: Introduce NVIDIA Tegra video decoder driver

2017-12-10 Thread Dmitry Osipenko
On 05.12.2017 16:03, Hans Verkuil wrote:
> On 12/05/17 13:17, Dmitry Osipenko wrote:
>> Hi Hans,
>>
>> On 04.12.2017 17:04, Hans Verkuil wrote:
>>> Hi Dmitry,
>>>
>>> As you already mention in the TODO, this should become a v4l2 codec driver.
>>>
>>> Good existing examples are the coda, qcom/venus and mtk-vcodec drivers.
>>>
>>> One thing that is not clear from this code is if the tegra hardware is a
>>> stateful or stateless codec, i.e. does it keep track of the decoder state
>>> in the hardware, or does the application have to keep track of the state and
>>> provide the state information together with the video data?
>>>
>>> I ask because at the moment only stateful codecs are supported. Work is 
>>> ongoing
>>> to support stateless codecs, but we don't support that for now.
>>>
>>
>> It is stateless. Is there anything ready to try out? If yes, could you please
>> give a reference to that work?
> 
> I rebased my two year old 'requests2' branch to the latest mainline version 
> and
> gave it the imaginative name 'requests3':
> 
> https://git.linuxtv.org/hverkuil/media_tree.git/log/?h=requests3
> 
> (Note: only compile tested!)

Thank you very much.

> This is what ChromeOS has been using (actually they use a slightly older 
> version)
> and the new version that is currently being developed will be similar, so any 
> work
> you do on top of this will carry over to the final version without too much 
> effort.
> 
> At least, that's the intention :-)
> 
> I've CC-ed Maxime and Giulio as well: they are looking into adding support for
> the stateless allwinner codec based on this code as well. There may well be
> opportunities for you to work together, esp. on the userspace side. Note that
> Rockchip has the same issue, they too have a stateless HW codec.

IIUC, we will have to define video decoder parameters in V4L API and then make a
V4L driver / userspace prototype (ffmpeg for example) that will use the requests
API for video decoding in order to upstream the requests API. Does it sound 
good?

>>
>>> Anyway, I'm OK with merging this in staging. Although I think it should go
>>> to staging/media since we want to keep track of it.
>>>
>>
>> Awesome, I'll move driver to staging/media in V5. Thanks!
> 
> Nice, thanks!


Re: [PATCH v4 3/5] staging: Introduce NVIDIA Tegra video decoder driver

2017-12-05 Thread Dmitry Osipenko
Hi Hans,

On 04.12.2017 17:04, Hans Verkuil wrote:
> Hi Dmitry,
> 
> As you already mention in the TODO, this should become a v4l2 codec driver.
> 
> Good existing examples are the coda, qcom/venus and mtk-vcodec drivers.
> 
> One thing that is not clear from this code is if the tegra hardware is a
> stateful or stateless codec, i.e. does it keep track of the decoder state
> in the hardware, or does the application have to keep track of the state and
> provide the state information together with the video data?
> 
> I ask because at the moment only stateful codecs are supported. Work is 
> ongoing
> to support stateless codecs, but we don't support that for now.
> 

It is stateless. Is there anything ready to try out? If yes, could you please
give a reference to that work?

> Anyway, I'm OK with merging this in staging. Although I think it should go
> to staging/media since we want to keep track of it.
> 

Awesome, I'll move driver to staging/media in V5. Thanks!


Re: [PATCH v4 1/5] ARM: tegra: Add device tree node to describe IRAM

2017-11-12 Thread Dmitry Osipenko
On 11.11.2017 17:18, Vladimir Zapolskiy wrote:
> Hi Dmitry,
> 
> On 10/20/2017 12:34 AM, Dmitry Osipenko wrote:
>> From: Vladimir Zapolskiy <v...@mleia.com>
>>
>> All Tegra SoCs contain 256KiB IRAM, which is used to store CPU resume code
>> and by hardware engines like a video decoder.
>>
>> Signed-off-by: Vladimir Zapolskiy <v...@mleia.com>
> 
> Please add also your own closing "Signed-off-by" tag, please reference
> to "Developer's Certificate of Origin 1.1", point (c), it is found in
> Documentation/process/submitting-patches.rst
> 

Indeed, thanks!

>> ---
>>  arch/arm/boot/dts/tegra114.dtsi | 8 
>>  arch/arm/boot/dts/tegra124.dtsi | 8 
>>  arch/arm/boot/dts/tegra20.dtsi  | 8 
>>  arch/arm/boot/dts/tegra30.dtsi  | 8 
> 
> My assumption is that Thierry would prefer to get 4 separate patches,
> one for each platform, please split the patch.
> 

Thierry, would you?

> Also thanks for your time and your efforts applied to push my occasional
> change, please feel free to take your own authorship for 3 out of 4 patches.
> 

Okay.

>>  4 files changed, 32 insertions(+)
>>
>> diff --git a/arch/arm/boot/dts/tegra114.dtsi 
>> b/arch/arm/boot/dts/tegra114.dtsi
>> index 8932ea3afd5f..13f6087790c8 100644
>> --- a/arch/arm/boot/dts/tegra114.dtsi
>> +++ b/arch/arm/boot/dts/tegra114.dtsi
>> @@ -10,6 +10,14 @@
>>  compatible = "nvidia,tegra114";
>>  interrupt-parent = <>;
>>  
>> +iram@4000 {
>> +compatible = "mmio-sram";
> 
> Unfortunately Thierry hasn't yet replied, but my assumption is that
> the list of compatibles should be extended with one more SoC specific
> value like
> 
>   compatible = "nvidia,tegra114-sysram", "mmio-sram";
> 
> I'm not sure, if Tegra maintainers want to see a new compatible
> described in Documentation/devicetree/bindings.
> 

The custom compatible string shouldn't be needed. AFAIK, IRAM doesn't have any
exposed controls, so just a generic "mmio-sram" suits well here.

>> +reg = <0x4000 0x4>;
>> +#address-cells = <1>;
>> +#size-cells = <1>;
>> +ranges = <0 0x4000 0x4>;
>> +};
>> +
>>  host1x@5000 {
>>  compatible = "nvidia,tegra114-host1x", "simple-bus";
>>  reg = <0x5000 0x00028000>;
>> diff --git a/arch/arm/boot/dts/tegra124.dtsi 
>> b/arch/arm/boot/dts/tegra124.dtsi
>> index 8baf00b89efb..a3585ed82646 100644
>> --- a/arch/arm/boot/dts/tegra124.dtsi
>> +++ b/arch/arm/boot/dts/tegra124.dtsi
> 
> The considerations from above are applicable to the rest of
> the touched platforms.



Re: [PATCH v4 2/5] media: dt: bindings: Add binding for NVIDIA Tegra Video Decoder Engine

2017-11-12 Thread Dmitry Osipenko
On 11.11.2017 17:21, Vladimir Zapolskiy wrote:
> Hi Dmitry,
> 
> On 10/20/2017 12:34 AM, Dmitry Osipenko wrote:
>> Add binding documentation for the Video Decoder Engine which is found
>> on NVIDIA Tegra20/30/114/124/132 SoC's.
>>
>> Signed-off-by: Dmitry Osipenko <dig...@gmail.com>
>> ---
>>  .../devicetree/bindings/media/nvidia,tegra-vde.txt | 55 
>> ++
>>  1 file changed, 55 insertions(+)
>>  create mode 100644 
>> Documentation/devicetree/bindings/media/nvidia,tegra-vde.txt
>>
>> diff --git a/Documentation/devicetree/bindings/media/nvidia,tegra-vde.txt 
>> b/Documentation/devicetree/bindings/media/nvidia,tegra-vde.txt
>> new file mode 100644
>> index ..470237ed6fe5
>> --- /dev/null
>> +++ b/Documentation/devicetree/bindings/media/nvidia,tegra-vde.txt
>> @@ -0,0 +1,55 @@
>> +NVIDIA Tegra Video Decoder Engine
>> +
>> +Required properties:
>> +- compatible : Must contain one of the following values:
>> +   - "nvidia,tegra20-vde"
>> +   - "nvidia,tegra30-vde"
>> +   - "nvidia,tegra114-vde"
>> +   - "nvidia,tegra124-vde"
>> +   - "nvidia,tegra132-vde"
>> +- reg : Must contain an entry for each entry in reg-names.
>> +- reg-names : Must include the following entries:
>> +  - sxe
>> +  - bsev
>> +  - mbe
>> +  - ppe
>> +  - mce
>> +  - tfe
>> +  - ppb
>> +  - vdma
>> +  - frameid
> 
> I've already mentioned it in my review of the driver code, but the
> version from v3 with a single region is more preferable.
> 
> Also it implies that "reg-names" property will be removed.
> 

Please see my reply to the drivers code review.

>> +- iram : Must contain phandle to the mmio-sram device node that represents
>> + IRAM region used by VDE.
>> +- interrupts : Must contain an entry for each entry in interrupt-names.
>> +- interrupt-names : Must include the following entries:
>> +  - sync-token
>> +  - bsev
>> +  - sxe
>> +- clocks : Must include the following entries:
>> +  - vde
>> +- resets : Must include the following entries:
>> +  - vde
>> +
>> +Example:
>> +
>> +video-codec@6001a000 {
>> +compatible = "nvidia,tegra20-vde";
>> +reg = <0x6001a000 0x1000 /* Syntax Engine */
>> +   0x6001b000 0x1000 /* Video Bitstream Engine */
>> +   0x6001c000  0x100 /* Macroblock Engine */
>> +   0x6001c200  0x100 /* Post-processing Engine */
>> +   0x6001c400  0x100 /* Motion Compensation Engine */
>> +   0x6001c600  0x100 /* Transform Engine */
>> +   0x6001c800  0x100 /* Pixel prediction block */
>> +   0x6001ca00  0x100 /* Video DMA */
>> +   0x6001d800  0x300 /* Video frame controls */>;
>> +reg-names = "sxe", "bsev", "mbe", "ppe", "mce",
>> +"tfe", "ppb", "vdma", "frameid";
>> +iram = <_pool>; /* IRAM region */
>> +interrupts = , /* Sync token interrupt 
>> */
>> + , /* BSE-V interrupt */
>> + ; /* SXE interrupt */
>> +interrupt-names = "sync-token", "bsev", "sxe";
>> +clocks = <_car TEGRA20_CLK_VDE>;
>> +resets = <_car 61>;
>> +};
>>



Re: [PATCH v4 3/5] staging: Introduce NVIDIA Tegra video decoder driver

2017-11-12 Thread Dmitry Osipenko
On 11.11.2017 17:06, Vladimir Zapolskiy wrote:
> Hi Dmitry,
> 
> I'll add just a couple of minor comments, in general the code looks
> very good.
> 

Thank you very much for the review!

> On 10/20/2017 12:34 AM, Dmitry Osipenko wrote:
>> NVIDIA Tegra20/30/114/124/132 SoC's have video decoder engine that
>> supports standard set of video formats like H.264 / MPEG-4 / WMV / VC1.
>> Currently implemented decoding of CAVLC H.264 on Tegra20 only.
>>
>> Signed-off-by: Dmitry Osipenko <dig...@gmail.com>
> 
> [snip]
> 
>> +++ b/drivers/staging/tegra-vde/uapi.h
>> @@ -0,0 +1,101 @@
>> +/*
>> + * Copyright (C) 2016-2017 Dmitry Osipenko <dig...@gmail.com>
>> + * All Rights Reserved.
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the 
>> "Software"),
>> + * to deal in the Software without restriction, including without limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice (including the next
>> + * paragraph) shall be included in all copies or substantial portions of the
>> + * Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 
>> OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
>> + * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
>> + * OTHER DEALINGS IN THE SOFTWARE.
> 
> From the specified MODULE_LICENSE("GPL") I'd rather expect to see a reference
> to GPLv2+ license in the header, and here the text resembles MIT license only.
> 
> I understand that it is a UAPI header file and it may happen that different
> rules are applied to this kind of sources, hopefully Greg can give the right
> directions.

Indeed, probably I copied the license text from some other UAPI header without
putting much thought. Will change it to GPL, thanks.

> 
> In general you may avoid the headache with the custom UAPI, if you reuse
> V4L2 interfaces, if I remember correctly drivers/media/platform/coda does it.
> Also from my point of view the custom UAPI is the only reason why the driver
> is pushed to the staging folder.

Thanks for the pointer. I see that coda driver does some raw bitstream parsing
in the driver, which is a bit icky, but probably is a good enough variant. I'll
take a closer look at implementing V4L interface at some point later, meanwhile
custom UAPI + VDPAU userspace serves us pretty well.

> 
> [snip]
> 
>> +struct tegra_vde {
>> +void __iomem *sxe;
>> +void __iomem *bsev;
>> +void __iomem *mbe;
>> +void __iomem *ppe;
>> +void __iomem *mce;
>> +void __iomem *tfe;
>> +void __iomem *ppb;
>> +void __iomem *vdma;
>> +void __iomem *frameid;
> 
> Please find a comment in tegra_vde_probe() function regarding
> devm_ioremap_resource() calls.
> 
>> +struct mutex lock;
>> +struct miscdevice miscdev;
>> +struct reset_control *rst;
>> +struct gen_pool *iram_pool;
>> +struct completion decode_completion;
>> +struct clk *clk;
>> +dma_addr_t iram_lists_addr;
>> +u32 *iram;
>> +};
> 
> [snip]
> 
>> +static int tegra_vde_wait_bsev(struct tegra_vde *vde, bool wait_dma)
>> +{
>> +struct device *dev = vde->miscdev.parent;
>> +u32 value;
>> +int err;
>> +
>> +err = readl_relaxed_poll_timeout(vde->bsev + INTR_STATUS, value,
>> + !(value & BIT(2)), 1, 100);
>> +if (err) {
>> +dev_err(dev, "BSEV unknown bit timeout\n");
>> +return err;
>> +}
>> +
>> +err = readl_relaxed_poll_timeout(vde->bsev + INTR_STATUS, value,
>> + (value & BSE_ICMDQUE_EMPTY), 1, 100);
>> +if (err) {
>> +dev_err(dev, "BSEV ICMDQUE flush timeout\n");
>> +return err;
>> +}
>> +
>> +if (!wait_dma)
>> +return 0;
>> +
>> +err = readl_relaxed_p

[PATCH v4 1/5] ARM: tegra: Add device tree node to describe IRAM

2017-10-19 Thread Dmitry Osipenko
From: Vladimir Zapolskiy 

All Tegra SoCs contain 256KiB IRAM, which is used to store CPU resume code
and by hardware engines like a video decoder.

Signed-off-by: Vladimir Zapolskiy 
---
 arch/arm/boot/dts/tegra114.dtsi | 8 
 arch/arm/boot/dts/tegra124.dtsi | 8 
 arch/arm/boot/dts/tegra20.dtsi  | 8 
 arch/arm/boot/dts/tegra30.dtsi  | 8 
 4 files changed, 32 insertions(+)

diff --git a/arch/arm/boot/dts/tegra114.dtsi b/arch/arm/boot/dts/tegra114.dtsi
index 8932ea3afd5f..13f6087790c8 100644
--- a/arch/arm/boot/dts/tegra114.dtsi
+++ b/arch/arm/boot/dts/tegra114.dtsi
@@ -10,6 +10,14 @@
compatible = "nvidia,tegra114";
interrupt-parent = <>;
 
+   iram@4000 {
+   compatible = "mmio-sram";
+   reg = <0x4000 0x4>;
+   #address-cells = <1>;
+   #size-cells = <1>;
+   ranges = <0 0x4000 0x4>;
+   };
+
host1x@5000 {
compatible = "nvidia,tegra114-host1x", "simple-bus";
reg = <0x5000 0x00028000>;
diff --git a/arch/arm/boot/dts/tegra124.dtsi b/arch/arm/boot/dts/tegra124.dtsi
index 8baf00b89efb..a3585ed82646 100644
--- a/arch/arm/boot/dts/tegra124.dtsi
+++ b/arch/arm/boot/dts/tegra124.dtsi
@@ -14,6 +14,14 @@
#address-cells = <2>;
#size-cells = <2>;
 
+   iram@4000 {
+   compatible = "mmio-sram";
+   reg = <0x0 0x4000 0x0 0x4>;
+   #address-cells = <1>;
+   #size-cells = <1>;
+   ranges = <0 0x0 0x4000 0x4>;
+   };
+
pcie@1003000 {
compatible = "nvidia,tegra124-pcie";
device_type = "pci";
diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi
index 7c85f97f72ea..aaf32f96f1e8 100644
--- a/arch/arm/boot/dts/tegra20.dtsi
+++ b/arch/arm/boot/dts/tegra20.dtsi
@@ -9,6 +9,14 @@
compatible = "nvidia,tegra20";
interrupt-parent = <>;
 
+   iram@4000 {
+   compatible = "mmio-sram";
+   reg = <0x4000 0x4>;
+   #address-cells = <1>;
+   #size-cells = <1>;
+   ranges = <0 0x4000 0x4>;
+   };
+
host1x@5000 {
compatible = "nvidia,tegra20-host1x", "simple-bus";
reg = <0x5000 0x00024000>;
diff --git a/arch/arm/boot/dts/tegra30.dtsi b/arch/arm/boot/dts/tegra30.dtsi
index 13960fda7471..3b447c64bf69 100644
--- a/arch/arm/boot/dts/tegra30.dtsi
+++ b/arch/arm/boot/dts/tegra30.dtsi
@@ -10,6 +10,14 @@
compatible = "nvidia,tegra30";
interrupt-parent = <>;
 
+   iram@4000 {
+   compatible = "mmio-sram";
+   reg = <0x4000 0x4>;
+   #address-cells = <1>;
+   #size-cells = <1>;
+   ranges = <0 0x4000 0x4>;
+   };
+
pcie@3000 {
compatible = "nvidia,tegra30-pcie";
device_type = "pci";
-- 
2.14.2



[PATCH v4 4/5] ARM: dts: tegra20: Add video decoder node

2017-10-19 Thread Dmitry Osipenko
Add Video Decoder Engine device node.

Signed-off-by: Dmitry Osipenko <dig...@gmail.com>
---
 arch/arm/boot/dts/tegra20.dtsi | 27 +++
 1 file changed, 27 insertions(+)

diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi
index aaf32f96f1e8..6b2d7bf5c707 100644
--- a/arch/arm/boot/dts/tegra20.dtsi
+++ b/arch/arm/boot/dts/tegra20.dtsi
@@ -15,6 +15,11 @@
#address-cells = <1>;
#size-cells = <1>;
ranges = <0 0x4000 0x4>;
+
+   vde_pool: vde {
+   reg = <0x400 0x3fc00>;
+   pool;
+   };
};
 
host1x@5000 {
@@ -257,6 +262,28 @@
*/
};
 
+   vde@6001a000 {
+   compatible = "nvidia,tegra20-vde";
+   reg = <0x6001a000 0x1000   /* Syntax Engine */
+  0x6001b000 0x1000   /* Video Bitstream Engine */
+  0x6001c000  0x100   /* Macroblock Engine */
+  0x6001c200  0x100   /* Post-processing Engine */
+  0x6001c400  0x100   /* Motion Compensation Engine */
+  0x6001c600  0x100   /* Transform Engine */
+  0x6001c800  0x100   /* Pixel prediction block */
+  0x6001ca00  0x100   /* Video DMA */
+  0x6001d800  0x300>; /* Video frame controls */
+   reg-names = "sxe", "bsev", "mbe", "ppe", "mce",
+   "tfe", "ppb", "vdma", "frameid";
+   iram = <_pool>; /* IRAM region */
+   interrupts = , /* Sync token 
interrupt */
+, /* BSE-V 
interrupt */
+; /* SXE interrupt 
*/
+   interrupt-names = "sync-token", "bsev", "sxe";
+   clocks = <_car TEGRA20_CLK_VDE>;
+   resets = <_car 61>;
+   };
+
apbmisc@7800 {
compatible = "nvidia,tegra20-apbmisc";
reg = <0x7800 0x64   /* Chip revision */
-- 
2.14.2



[PATCH v4 3/5] staging: Introduce NVIDIA Tegra video decoder driver

2017-10-19 Thread Dmitry Osipenko
NVIDIA Tegra20/30/114/124/132 SoC's have video decoder engine that
supports standard set of video formats like H.264 / MPEG-4 / WMV / VC1.
Currently implemented decoding of CAVLC H.264 on Tegra20 only.

Signed-off-by: Dmitry Osipenko <dig...@gmail.com>
---
 drivers/staging/Kconfig|2 +
 drivers/staging/Makefile   |1 +
 drivers/staging/tegra-vde/Kconfig  |7 +
 drivers/staging/tegra-vde/Makefile |1 +
 drivers/staging/tegra-vde/TODO |5 +
 drivers/staging/tegra-vde/uapi.h   |  101 +++
 drivers/staging/tegra-vde/vde.c| 1209 
 7 files changed, 1326 insertions(+)
 create mode 100644 drivers/staging/tegra-vde/Kconfig
 create mode 100644 drivers/staging/tegra-vde/Makefile
 create mode 100644 drivers/staging/tegra-vde/TODO
 create mode 100644 drivers/staging/tegra-vde/uapi.h
 create mode 100644 drivers/staging/tegra-vde/vde.c

diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
index 554683912cff..10c982811093 100644
--- a/drivers/staging/Kconfig
+++ b/drivers/staging/Kconfig
@@ -118,4 +118,6 @@ source "drivers/staging/vboxvideo/Kconfig"
 
 source "drivers/staging/pi433/Kconfig"
 
+source "drivers/staging/tegra-vde/Kconfig"
+
 endif # STAGING
diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile
index 8951c37d8d80..c5ef39767f22 100644
--- a/drivers/staging/Makefile
+++ b/drivers/staging/Makefile
@@ -49,3 +49,4 @@ obj-$(CONFIG_BCM2835_VCHIQ)   += vc04_services/
 obj-$(CONFIG_CRYPTO_DEV_CCREE) += ccree/
 obj-$(CONFIG_DRM_VBOXVIDEO)+= vboxvideo/
 obj-$(CONFIG_PI433)+= pi433/
+obj-$(CONFIG_TEGRA_VDE)+= tegra-vde/
diff --git a/drivers/staging/tegra-vde/Kconfig 
b/drivers/staging/tegra-vde/Kconfig
new file mode 100644
index ..ec3ebdaa
--- /dev/null
+++ b/drivers/staging/tegra-vde/Kconfig
@@ -0,0 +1,7 @@
+config TEGRA_VDE
+   tristate "NVIDIA Tegra Video Decoder Engine driver"
+   depends on ARCH_TEGRA || COMPILE_TEST
+   select SRAM
+   help
+   Say Y here to enable support for the NVIDIA Tegra video decoder
+   driver.
diff --git a/drivers/staging/tegra-vde/Makefile 
b/drivers/staging/tegra-vde/Makefile
new file mode 100644
index ..e7c0df1174bf
--- /dev/null
+++ b/drivers/staging/tegra-vde/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_TEGRA_VDE)+= vde.o
diff --git a/drivers/staging/tegra-vde/TODO b/drivers/staging/tegra-vde/TODO
new file mode 100644
index ..e98bbc7b3c19
--- /dev/null
+++ b/drivers/staging/tegra-vde/TODO
@@ -0,0 +1,5 @@
+TODO:
+   - Figure out how generic V4L2 API could be utilized by this driver,
+ implement it.
+
+Contact: Dmitry Osipenko <dig...@gmail.com>
diff --git a/drivers/staging/tegra-vde/uapi.h b/drivers/staging/tegra-vde/uapi.h
new file mode 100644
index ..8502032b5ee2
--- /dev/null
+++ b/drivers/staging/tegra-vde/uapi.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2016-2017 Dmitry Osipenko <dig...@gmail.com>
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _UAPI_TEGRA_VDE_H_
+#define _UAPI_TEGRA_VDE_H_
+
+#include 
+#include 
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#define FLAG_B_FRAME   (1 << 0)
+#define FLAG_REFERENCE (1 << 1)
+
+struct tegra_vde_h264_frame {
+   __s32 y_fd;
+   __s32 cb_fd;
+   __s32 cr_fd;
+   __s32 aux_fd;
+   __u32 y_offset;
+   __u32 cb_offset;
+   __u32 cr_offset;
+   __u32 aux_offset;
+   __u32 frame_num;
+   __u32 flags;
+
+   __u32 reserved;
+} __attribute__((packed));
+
+struct tegra_vde_h264_decoder_ctx {
+   __s32 bitstream_data_fd;
+   __u32 bitstream_data_offset;
+
+   __u64 dpb_frames_ptr;
+   __u8  dpb_frames_nb;
+   __u8  dpb_ref_

[PATCH v4 5/5] ARM: defconfig: tegra: Enable Video Decoder driver

2017-10-19 Thread Dmitry Osipenko
Compile Tegra VDE driver as a module.

Signed-off-by: Dmitry Osipenko <dig...@gmail.com>
---
 arch/arm/configs/tegra_defconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/configs/tegra_defconfig b/arch/arm/configs/tegra_defconfig
index 6678f2929356..c931bd48f7fd 100644
--- a/arch/arm/configs/tegra_defconfig
+++ b/arch/arm/configs/tegra_defconfig
@@ -29,7 +29,6 @@ CONFIG_PCI_MSI=y
 CONFIG_PCI_TEGRA=y
 CONFIG_SMP=y
 CONFIG_PREEMPT=y
-CONFIG_AEABI=y
 CONFIG_HIGHMEM=y
 CONFIG_CMA=y
 CONFIG_ZBOOT_ROM_TEXT=0x0
@@ -256,6 +255,7 @@ CONFIG_KEYBOARD_NVEC=y
 CONFIG_SERIO_NVEC_PS2=y
 CONFIG_NVEC_POWER=y
 CONFIG_NVEC_PAZ00=y
+CONFIG_TEGRA_VDE=m
 CONFIG_TEGRA_IOMMU_GART=y
 CONFIG_TEGRA_IOMMU_SMMU=y
 CONFIG_ARCH_TEGRA_2x_SOC=y
-- 
2.14.2



[PATCH v4 0/5] NVIDIA Tegra20 video decoder driver

2017-10-19 Thread Dmitry Osipenko
VDE driver provides accelerated video decoding to NVIDIA Tegra SoC's,
it is a result of reverse-engineering efforts. Driver has been tested on
Toshiba AC100 and Acer A500, it should work on any Tegra20 device.

In userspace this driver is utilized by libvdpau-tegra [0] that implements
VDPAU interface, so any video player that supports VDPAU can provide
accelerated video decoding on Tegra20 on Linux.

[0] https://github.com/grate-driver/libvdpau-tegra

Change log:
v4:
- Added mmio-sram "IRAM DT node" patch from Vladimir Zapolskiy to
  the series, I modified it to cover all Tegra's and not only Tegra20
- Utilized genalloc for the reservation of IRAM region as per
  Vladimir's suggestion, VDE driver now selects SRAM driver in Kconfig
- Added defconfig patch to the series
- Described VDE registers in DT per HW unit, excluding BSE-A / UCQ
  and holes between the units
- Extended DT compatibility property with Tegra30/114/124/132 in the
  binding doc.
- Removed BSE-A interrupt from the DT binding because it's very
  likely that Audio Bitstream Engine isn't integrated with VDE
- Removed UCQ interrupt from the DT binding because in TRM it is
  represented as a distinct HW block that probably should have
  its own driver
- Addressed v3 review comments: factored out DT binding addition
  into a standalone patch, moved binding to media/, removed
  clocks/resets-names

v3:
- Suppressed compilation warnings reported by 'kbuild test robot'

v2:
- Addressed v1 review comments from Stephen Warren and Dan Carpenter
- Implemented runtime PM
- Miscellaneous code cleanups
- Changed 'TODO'
- CC'd media maintainers for the review as per Greg's K-H request,
  v1 can be viewed at https://lkml.org/lkml/2017/9/25/606

Dmitry Osipenko (4):
  media: dt: bindings: Add binding for NVIDIA Tegra Video Decoder Engine
  staging: Introduce NVIDIA Tegra video decoder driver
  ARM: dts: tegra20: Add video decoder node
  ARM: defconfig: tegra: Enable Video Decoder driver

Vladimir Zapolskiy (1):
  ARM: tegra: Add device tree node to describe IRAM

 .../devicetree/bindings/media/nvidia,tegra-vde.txt |   55 +
 arch/arm/boot/dts/tegra114.dtsi|8 +
 arch/arm/boot/dts/tegra124.dtsi|8 +
 arch/arm/boot/dts/tegra20.dtsi |   35 +
 arch/arm/boot/dts/tegra30.dtsi |8 +
 arch/arm/configs/tegra_defconfig   |2 +-
 drivers/staging/Kconfig|2 +
 drivers/staging/Makefile   |1 +
 drivers/staging/tegra-vde/Kconfig  |7 +
 drivers/staging/tegra-vde/Makefile |1 +
 drivers/staging/tegra-vde/TODO |5 +
 drivers/staging/tegra-vde/uapi.h   |  101 ++
 drivers/staging/tegra-vde/vde.c| 1209 
 13 files changed, 1441 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/devicetree/bindings/media/nvidia,tegra-vde.txt
 create mode 100644 drivers/staging/tegra-vde/Kconfig
 create mode 100644 drivers/staging/tegra-vde/Makefile
 create mode 100644 drivers/staging/tegra-vde/TODO
 create mode 100644 drivers/staging/tegra-vde/uapi.h
 create mode 100644 drivers/staging/tegra-vde/vde.c

-- 
2.14.2



[PATCH v4 2/5] media: dt: bindings: Add binding for NVIDIA Tegra Video Decoder Engine

2017-10-19 Thread Dmitry Osipenko
Add binding documentation for the Video Decoder Engine which is found
on NVIDIA Tegra20/30/114/124/132 SoC's.

Signed-off-by: Dmitry Osipenko <dig...@gmail.com>
---
 .../devicetree/bindings/media/nvidia,tegra-vde.txt | 55 ++
 1 file changed, 55 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/media/nvidia,tegra-vde.txt

diff --git a/Documentation/devicetree/bindings/media/nvidia,tegra-vde.txt 
b/Documentation/devicetree/bindings/media/nvidia,tegra-vde.txt
new file mode 100644
index ..470237ed6fe5
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/nvidia,tegra-vde.txt
@@ -0,0 +1,55 @@
+NVIDIA Tegra Video Decoder Engine
+
+Required properties:
+- compatible : Must contain one of the following values:
+   - "nvidia,tegra20-vde"
+   - "nvidia,tegra30-vde"
+   - "nvidia,tegra114-vde"
+   - "nvidia,tegra124-vde"
+   - "nvidia,tegra132-vde"
+- reg : Must contain an entry for each entry in reg-names.
+- reg-names : Must include the following entries:
+  - sxe
+  - bsev
+  - mbe
+  - ppe
+  - mce
+  - tfe
+  - ppb
+  - vdma
+  - frameid
+- iram : Must contain phandle to the mmio-sram device node that represents
+ IRAM region used by VDE.
+- interrupts : Must contain an entry for each entry in interrupt-names.
+- interrupt-names : Must include the following entries:
+  - sync-token
+  - bsev
+  - sxe
+- clocks : Must include the following entries:
+  - vde
+- resets : Must include the following entries:
+  - vde
+
+Example:
+
+video-codec@6001a000 {
+   compatible = "nvidia,tegra20-vde";
+   reg = <0x6001a000 0x1000 /* Syntax Engine */
+  0x6001b000 0x1000 /* Video Bitstream Engine */
+  0x6001c000  0x100 /* Macroblock Engine */
+  0x6001c200  0x100 /* Post-processing Engine */
+  0x6001c400  0x100 /* Motion Compensation Engine */
+  0x6001c600  0x100 /* Transform Engine */
+  0x6001c800  0x100 /* Pixel prediction block */
+  0x6001ca00  0x100 /* Video DMA */
+  0x6001d800  0x300 /* Video frame controls */>;
+   reg-names = "sxe", "bsev", "mbe", "ppe", "mce",
+   "tfe", "ppb", "vdma", "frameid";
+   iram = <_pool>; /* IRAM region */
+   interrupts = , /* Sync token interrupt 
*/
+, /* BSE-V interrupt */
+; /* SXE interrupt */
+   interrupt-names = "sync-token", "bsev", "sxe";
+   clocks = <_car TEGRA20_CLK_VDE>;
+   resets = <_car 61>;
+};
-- 
2.14.2



Re: [PATCH v3 1/2] staging: Introduce NVIDIA Tegra20 video decoder driver

2017-10-17 Thread Dmitry Osipenko
On 18.10.2017 00:13, Rob Herring wrote:
> On Tue, Oct 17, 2017 at 3:24 PM, Thierry Reding
>  wrote:
>> On Tue, Oct 17, 2017 at 03:13:54PM -0500, Rob Herring wrote:
>> [...]
 diff --git 
 a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-vde.txt 
 b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-vde.txt
>> [...]
 +- resets : Must contain an entry for each entry in reset-names.
 +  See ../reset/reset.txt for details.
 +- reset-names : Must include the following entries:
 +  - vde
>>>
>>> -names is pointless when there is only one.
>>
>> I'd prefer to keep it. In the past we occasionally had to add clocks or
>> resets to a device tree node where only one had been present (and hence
>> no -names property) and that caused some awkwardness because verbiage
>> had to be added to the bindings that clarified that one particular entry
>> (the original one) always had to come first.
> 
> The order should be specified regardless of -names and the original
> one has to come first if you add any. That's not awkwardness, but how
> bindings work.

Probably it would be okay to remove '-names' from the binding doc, but keep them
in the actual DT, wouldn't it?


Re: [PATCH v3 2/2] ARM: dts: tegra20: Add video decoder node

2017-10-12 Thread Dmitry Osipenko
Hello Vladimir,

On 12.10.2017 10:43, Vladimir Zapolskiy wrote:
> Hello Dmitry,
> 
> On 10/11/2017 11:08 PM, Dmitry Osipenko wrote:
>> Add a device node for the video decoder engine found on Tegra20.
>>
>> Signed-off-by: Dmitry Osipenko <dig...@gmail.com>
>> ---
>>  arch/arm/boot/dts/tegra20.dtsi | 17 +
>>  1 file changed, 17 insertions(+)
>>
>> diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi
>> index 7c85f97f72ea..1b5d54b6c0cb 100644
>> --- a/arch/arm/boot/dts/tegra20.dtsi
>> +++ b/arch/arm/boot/dts/tegra20.dtsi
>> @@ -249,6 +249,23 @@
>>  */
>>  };
>>  
>> +vde@6001a000 {
>> +compatible = "nvidia,tegra20-vde";
>> +reg = <0x6001a000 0x3D00/* VDE registers */
>> +   0x4400 0x3FC00>; /* IRAM region */
> 
> this notation of a used region in IRAM is non-standard and potentially it
> may lead to conflicts for IRAM resource between users.
> 
> My proposal is to add a valid device tree node to describe an IRAM region
> firstly, then reserve a subregion in it by using a new "iram" property.
> 

The defined in DT IRAM region used by VDE isn't exactly correct, actually it
should be much smaller. I don't know exactly what parts of IRAM VDE uses, for
now it is just safer to assign the rest of the IRAM region to VDE.

I'm not sure whether it really worthy to use a dynamic allocator for a single
static allocation, but maybe it would come handy later.. Stephen / Jon /
Thierry, what do you think?

> 8<
> From: Vladimir Zapolskiy <v...@mleia.com>
> Date: Thu, 12 Oct 2017 10:25:45 +0300
> Subject: [PATCH] ARM: tegra: add device tree node to describe IRAM on Tegra20
> 
> All Tegra20 SoCs contain 256KiB IRAM, which is used to store
> resume code and by a video decoder engine.
> 
> Signed-off-by: Vladimir Zapolskiy <v...@mleia.com>
> ---
>  arch/arm/boot/dts/tegra20.dtsi | 8 
>  1 file changed, 8 insertions(+)
> 
> diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi
> index 7c85f97f72ea..fd2843c90920 100644
> --- a/arch/arm/boot/dts/tegra20.dtsi
> +++ b/arch/arm/boot/dts/tegra20.dtsi
> @@ -9,6 +9,14 @@
>   compatible = "nvidia,tegra20";
>   interrupt-parent = <>;
>  
> + iram@4000 {
> + compatible = "mmio-sram";
> + reg = <0x4000 0x4>;
> + #address-cells = <1>;
> + #size-cells = <1>;
> + ranges = <0 0x4000 0x4>;
> + };
> +
>   host1x@5000 {
>   compatible = "nvidia,tegra20-host1x", "simple-bus";
>   reg = <0x5000 0x00024000>;
> 8<
> 
> Please add the change above to your next version of the series, or
> if you wish I can send it separately for review by Thierry.
> 
> After applying that change you do define a region in IRAM for the exclusive
> usage by a video decoder engine and add an 'iram' property:
> 

Newer Tegra generations also have the IRAM, so I think Tegra30/114/124 DT's
should also include the same IRAM node for consistency. I'll extend your patch
to cover other Tegra's and include it in v4 if you don't mind and if Stephen /
Jon / Thierry would approve your proposal.

> 8<
> diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi
> index fd2843c90920..5133fbac2185 100644
> --- a/arch/arm/boot/dts/tegra20.dtsi
> +++ b/arch/arm/boot/dts/tegra20.dtsi
> @@ -15,6 +15,11 @@
>   #address-cells = <1>;
>   #size-cells = <1>;
>   ranges = <0 0x4000 0x4>;
> +
> + vde_pool: vde {
> + reg = <0x400 0x3fc00>;
> + pool;
> + };
>   };
>  
>   host1x@5000 {
> [snip]
> 
> + vde@6001a000 {
> + compatible = "nvidia,tegra20-vde";
> + reg = <0x6001a000 0x3d00>;  /* VDE registers */
> + iram = <_pool>; /* IRAM region */
> [snip]
> 8<
> 
> And finally in the driver you'll use genalloc API to access the IRAM
> region, for that you can find ready examples in the kernel source code.
> 

Thank you very much for taking a look at the patch!


Re: [PATCH v3 2/2] ARM: dts: tegra20: Add video decoder node

2017-10-12 Thread Dmitry Osipenko
On 12.10.2017 13:57, Jon Hunter wrote:
> 
> On 12/10/17 11:51, Dmitry Osipenko wrote:
>> On 12.10.2017 11:49, Jon Hunter wrote:
>>>
>>> On 11/10/17 21:08, Dmitry Osipenko wrote:
>>>> Add a device node for the video decoder engine found on Tegra20.
>>>>
>>>> Signed-off-by: Dmitry Osipenko <dig...@gmail.com>
>>>> ---
>>>>  arch/arm/boot/dts/tegra20.dtsi | 17 +
>>>>  1 file changed, 17 insertions(+)
>>>>
>>>> diff --git a/arch/arm/boot/dts/tegra20.dtsi 
>>>> b/arch/arm/boot/dts/tegra20.dtsi
>>>> index 7c85f97f72ea..1b5d54b6c0cb 100644
>>>> --- a/arch/arm/boot/dts/tegra20.dtsi
>>>> +++ b/arch/arm/boot/dts/tegra20.dtsi
>>>> @@ -249,6 +249,23 @@
>>>>*/
>>>>};
>>>>  
>>>> +  vde@6001a000 {
>>>> +  compatible = "nvidia,tegra20-vde";
>>>> +  reg = <0x6001a000 0x3D00/* VDE registers */
>>>> + 0x4400 0x3FC00>; /* IRAM region */
>>>> +  reg-names = "regs", "iram";
>>>> +  interrupts = , /* UCQ error 
>>>> interrupt */
>>>> +   , /* Sync token 
>>>> interrupt */
>>>> +   , /* BSE-V 
>>>> interrupt */
>>>> +   , /* BSE-A 
>>>> interrupt */
>>>> +   ; /* SXE interrupt 
>>>> */
>>>> +  interrupt-names = "ucq-error", "sync-token", "bsev", "bsea", 
>>>> "sxe";
>>>> +  clocks = <_car TEGRA20_CLK_VDE>;
>>>> +  clock-names = "vde";
>>>> +  resets = <_car 61>;
>>>> +  reset-names = "vde";
>>>> +  };
>>>> +
>>>
>>> I don't see any binding documentation for this node. We need to make
>>> sure we add this.
>>>
>>
>> It's in the first patch.
>>
>> +++ b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-vde.txt
>>
> 
> Ah yes indeed, then that needs to be a separate patch.
> 

Okay


Re: [PATCH v3 2/2] ARM: dts: tegra20: Add video decoder node

2017-10-12 Thread Dmitry Osipenko
On 12.10.2017 11:49, Jon Hunter wrote:
> 
> On 11/10/17 21:08, Dmitry Osipenko wrote:
>> Add a device node for the video decoder engine found on Tegra20.
>>
>> Signed-off-by: Dmitry Osipenko <dig...@gmail.com>
>> ---
>>  arch/arm/boot/dts/tegra20.dtsi | 17 +
>>  1 file changed, 17 insertions(+)
>>
>> diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi
>> index 7c85f97f72ea..1b5d54b6c0cb 100644
>> --- a/arch/arm/boot/dts/tegra20.dtsi
>> +++ b/arch/arm/boot/dts/tegra20.dtsi
>> @@ -249,6 +249,23 @@
>>  */
>>  };
>>  
>> +vde@6001a000 {
>> +compatible = "nvidia,tegra20-vde";
>> +reg = <0x6001a000 0x3D00/* VDE registers */
>> +   0x4400 0x3FC00>; /* IRAM region */
>> +reg-names = "regs", "iram";
>> +interrupts = , /* UCQ error 
>> interrupt */
>> + , /* Sync token 
>> interrupt */
>> + , /* BSE-V 
>> interrupt */
>> + , /* BSE-A 
>> interrupt */
>> + ; /* SXE interrupt 
>> */
>> +interrupt-names = "ucq-error", "sync-token", "bsev", "bsea", 
>> "sxe";
>> +clocks = <_car TEGRA20_CLK_VDE>;
>> +clock-names = "vde";
>> +resets = <_car 61>;
>> +reset-names = "vde";
>> +};
>> +
> 
> I don't see any binding documentation for this node. We need to make
> sure we add this.
> 

It's in the first patch.

+++ b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-vde.txt


Re: [PATCH v3 1/2] staging: Introduce NVIDIA Tegra20 video decoder driver

2017-10-11 Thread Dmitry Osipenko
On 11.10.2017 23:47, Nicolas Dufresne wrote:
> Le mercredi 11 octobre 2017 à 23:08 +0300, Dmitry Osipenko a écrit :
>> diff --git a/drivers/staging/tegra-vde/TODO b/drivers/staging/tegra-
>> vde/TODO
>> new file mode 100644
>> index ..e98bbc7b3c19
>> --- /dev/null
>> +++ b/drivers/staging/tegra-vde/TODO
>> @@ -0,0 +1,5 @@
>> +TODO:
>> +   - Figure out how generic V4L2 API could be utilized by this
>> driver,
>> + implement it.
>> +
> 
> That is a very interesting effort, I think it's the first time someone
> is proposing an upstream driver for a Tegra platform.

Thanks!

 When I look
> tegra_vde_h264_decoder_ctx, it looks like the only thing that the HW is
> not parsing is the media header (pps/sps). Is that correct ?
> 

That's correct. I think it's quite common among embedded (mobile) and
desktop-grade decoders to require some auxiliary info from the media headers.

> I wonder how acceptable it would be to parse this inside the driver. It
> is no more complex then parsing an EDID. If that was possible, wrapping
> this driver as a v4l2 mem2mem should be rather simple. As a side
> effect, you'll automatically get some userspace working, notably
> GStreamer and FFmpeg.
> 

Parsing bitstream in kernel feels a bit dirty, although it's up to media
maintainers to decide.

> For the case even parsing the headers is too much from a kernel point
> of view, then I think you should have a look at the following effort.
> It's a proposal base on yet to be merged Request API. Hugues is also
> propose a libv4l2 adapter that makes the driver looks like a normal
> v4l2 m2m, hiding all the userspace parsing and table filling. This
> though, is long term plan to integrate state-less or parser-less
> encoders into linux-media. It seems rather overkill for state-full
> driver that requires parsed headers like PPS/SPS.
> 
> https://lwn.net/Articles/720797/
> 

I'll take a look at the Request API / libv4l2 adapter, thank you very much for
pointing to it.


[PATCH v3 1/2] staging: Introduce NVIDIA Tegra20 video decoder driver

2017-10-11 Thread Dmitry Osipenko
Video decoder, found on NVIDIA Tegra20 SoC, supports a standard set of
video formats like H.264 / MPEG-4 / WMV / VC1. Currently driver supports
decoding of CAVLC H.264 only.

Signed-off-by: Dmitry Osipenko <dig...@gmail.com>
---
 .../bindings/arm/tegra/nvidia,tegra20-vde.txt  |   44 +
 drivers/staging/Kconfig|2 +
 drivers/staging/Makefile   |1 +
 drivers/staging/tegra-vde/Kconfig  |6 +
 drivers/staging/tegra-vde/Makefile |1 +
 drivers/staging/tegra-vde/TODO |5 +
 drivers/staging/tegra-vde/uapi.h   |  101 ++
 drivers/staging/tegra-vde/vde.c| 1109 
 8 files changed, 1269 insertions(+)
 create mode 100644 
Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-vde.txt
 create mode 100644 drivers/staging/tegra-vde/Kconfig
 create mode 100644 drivers/staging/tegra-vde/Makefile
 create mode 100644 drivers/staging/tegra-vde/TODO
 create mode 100644 drivers/staging/tegra-vde/uapi.h
 create mode 100644 drivers/staging/tegra-vde/vde.c

diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-vde.txt 
b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-vde.txt
new file mode 100644
index ..c3f847db8167
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-vde.txt
@@ -0,0 +1,44 @@
+NVIDIA Tegra Video Decoder Engine
+
+Required properties:
+- compatible : "nvidia,tegra20-vde"
+- reg : Must contain 2 register ranges: registers and IRAM region that
+VDE uses for its internal needs and for passing some of decoding
+parameters.
+- reg-names : Must include the following entries:
+  - regs
+  - iram
+- interrupts : Must contain an entry for each entry in interrupt-names.
+- interrupt-names : Must include the following entries:
+  - ucq-error
+  - sync-token
+  - bsev
+  - bsea
+  - sxe
+- clocks : Must contain an entry for each entry in clock-names.
+  See ../clocks/clock-bindings.txt for details.
+- clock-names : Must include the following entries:
+  - vde
+- resets : Must contain an entry for each entry in reset-names.
+  See ../reset/reset.txt for details.
+- reset-names : Must include the following entries:
+  - vde
+
+Example:
+
+vde@6001a000 {
+   compatible = "nvidia,tegra20-vde";
+   reg = <0x6001a000 0x3D00/* VDE registers */
+   0x4400 0x3FC00>; /* IRAM region */
+   reg-names = "regs", "iram";
+   interrupts = , /* UCQ error interrupt */
+   , /* Sync token 
interrupt */
+   , /* BSE-V interrupt */
+   , /* BSE-A interrupt */
+   ; /* SXE interrupt */
+   interrupt-names = "ucq-error", "sync-token", "bsev", "bsea", "sxe";
+   clocks = <_car TEGRA20_CLK_VDE>;
+   clock-names = "vde";
+   resets = <_car 61>;
+   reset-names = "vde";
+};
diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
index 554683912cff..10c982811093 100644
--- a/drivers/staging/Kconfig
+++ b/drivers/staging/Kconfig
@@ -118,4 +118,6 @@ source "drivers/staging/vboxvideo/Kconfig"
 
 source "drivers/staging/pi433/Kconfig"
 
+source "drivers/staging/tegra-vde/Kconfig"
+
 endif # STAGING
diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile
index 8951c37d8d80..c5ef39767f22 100644
--- a/drivers/staging/Makefile
+++ b/drivers/staging/Makefile
@@ -49,3 +49,4 @@ obj-$(CONFIG_BCM2835_VCHIQ)   += vc04_services/
 obj-$(CONFIG_CRYPTO_DEV_CCREE) += ccree/
 obj-$(CONFIG_DRM_VBOXVIDEO)+= vboxvideo/
 obj-$(CONFIG_PI433)+= pi433/
+obj-$(CONFIG_TEGRA_VDE)+= tegra-vde/
diff --git a/drivers/staging/tegra-vde/Kconfig 
b/drivers/staging/tegra-vde/Kconfig
new file mode 100644
index ..730ee006de66
--- /dev/null
+++ b/drivers/staging/tegra-vde/Kconfig
@@ -0,0 +1,6 @@
+config TEGRA_VDE
+   tristate "NVIDIA Tegra Video Decoder Engine driver"
+   depends on ARCH_TEGRA_2x_SOC || COMPILE_TEST
+   help
+   Say Y here to enable support for the NVIDIA Tegra video decoder
+   driver.
diff --git a/drivers/staging/tegra-vde/Makefile 
b/drivers/staging/tegra-vde/Makefile
new file mode 100644
index ..e7c0df1174bf
--- /dev/null
+++ b/drivers/staging/tegra-vde/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_TEGRA_VDE)+= vde.o
diff --git a/drivers/staging/tegra-vde/TODO b/drivers/staging/tegra-vde/TODO
new file mode 100644
index ..e98bbc7b3c19
--- /dev/null
+++ b/drivers/staging/tegra-vde/TODO
@@ -0,0 +1,5 @@
+TODO:
+   - Figure out how generic V4L2 API could be utilized by this driver,
+ implement it.
+
+Contact: Dmitry Osipenko <dig...@gmail.com>
diff --git a/drivers/staging/tegra-vde/uapi

[PATCH v3 2/2] ARM: dts: tegra20: Add video decoder node

2017-10-11 Thread Dmitry Osipenko
Add a device node for the video decoder engine found on Tegra20.

Signed-off-by: Dmitry Osipenko <dig...@gmail.com>
---
 arch/arm/boot/dts/tegra20.dtsi | 17 +
 1 file changed, 17 insertions(+)

diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi
index 7c85f97f72ea..1b5d54b6c0cb 100644
--- a/arch/arm/boot/dts/tegra20.dtsi
+++ b/arch/arm/boot/dts/tegra20.dtsi
@@ -249,6 +249,23 @@
*/
};
 
+   vde@6001a000 {
+   compatible = "nvidia,tegra20-vde";
+   reg = <0x6001a000 0x3D00/* VDE registers */
+  0x4400 0x3FC00>; /* IRAM region */
+   reg-names = "regs", "iram";
+   interrupts = , /* UCQ error 
interrupt */
+, /* Sync token 
interrupt */
+, /* BSE-V 
interrupt */
+, /* BSE-A 
interrupt */
+; /* SXE interrupt 
*/
+   interrupt-names = "ucq-error", "sync-token", "bsev", "bsea", 
"sxe";
+   clocks = <_car TEGRA20_CLK_VDE>;
+   clock-names = "vde";
+   resets = <_car 61>;
+   reset-names = "vde";
+   };
+
apbmisc@7800 {
compatible = "nvidia,tegra20-apbmisc";
reg = <0x7800 0x64   /* Chip revision */
-- 
2.14.2



[PATCH v3 0/2] NVIDIA Tegra20 video decoder driver

2017-10-11 Thread Dmitry Osipenko
This driver provides accelerated video decoding to NVIDIA Tegra20 SoC's,
it is a result of reverse-engineering efforts. Driver has been tested on
Toshiba AC100 and Acer A500, it should work on any Tegra20 device.

In userspace this driver is utilized by libvdpau-tegra [0] that implements
VDPAU interface, so any video player that supports VDPAU can provide
accelerated video decoding on Tegra20 on Linux.

[0] https://github.com/grate-driver/libvdpau-tegra

Change log:
v3:
- Suppressed compilation warnings reported by 'kbuild test robot'

v2:
- Addressed v1 review comments from Stephen Warren and Dan Carpenter
- Implemented runtime PM
- Miscellaneous code cleanups
- Changed 'TODO'
- CC'd media maintainers for the review as per Greg K-H request,
  v1 can be viewed at https://lkml.org/lkml/2017/9/25/606

Dmitry Osipenko (2):
  staging: Introduce NVIDIA Tegra20 video decoder driver
  ARM: dts: tegra20: Add video decoder node

 .../bindings/arm/tegra/nvidia,tegra20-vde.txt  |   44 +
 arch/arm/boot/dts/tegra20.dtsi |   17 +
 drivers/staging/Kconfig|2 +
 drivers/staging/Makefile   |1 +
 drivers/staging/tegra-vde/Kconfig  |6 +
 drivers/staging/tegra-vde/Makefile |1 +
 drivers/staging/tegra-vde/TODO |5 +
 drivers/staging/tegra-vde/uapi.h   |  101 ++
 drivers/staging/tegra-vde/vde.c| 1109 
 9 files changed, 1286 insertions(+)
 create mode 100644 
Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-vde.txt
 create mode 100644 drivers/staging/tegra-vde/Kconfig
 create mode 100644 drivers/staging/tegra-vde/Makefile
 create mode 100644 drivers/staging/tegra-vde/TODO
 create mode 100644 drivers/staging/tegra-vde/uapi.h
 create mode 100644 drivers/staging/tegra-vde/vde.c

-- 
2.14.2



Re: [PATCH v2 1/2] staging: Introduce NVIDIA Tegra20 video decoder driver

2017-10-06 Thread Dmitry Osipenko
On 06.10.2017 06:57, kbuild test robot wrote:
> Hi Dmitry,
> 
> [auto build test WARNING on staging/staging-testing]
> [also build test WARNING on v4.14-rc3 next-20170929]
> [cannot apply to tegra/for-next]
> [if your patch is applied to the wrong git tree, please drop us a note to 
> help improve the system]
> 
> url:
> https://github.com/0day-ci/linux/commits/Dmitry-Osipenko/staging-Introduce-NVIDIA-Tegra20-video-decoder-driver/20171006-101015
> config: ia64-allmodconfig (attached as .config)
> compiler: ia64-linux-gcc (GCC) 6.2.0
> reproduce:
> wget 
> https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
> ~/bin/make.cross
> chmod +x ~/bin/make.cross
> # save the attached .config to linux build tree
> make.cross ARCH=ia64 
> 
> All warnings (new ones prefixed by >>):
> 
>In file included from include/linux/kernel.h:13:0,
> from include/linux/clk.h:16,
> from drivers/staging/tegra-vde/vde.c:11:
>drivers/staging/tegra-vde/vde.c: In function 'tegra_vde_setup_hw_context':
>>> drivers/staging/tegra-vde/vde.c:51:11: warning: format '%X' expects 
>>> argument of type 'unsigned int', but argument 5 has type 'long long 
>>> unsigned int' [-Wformat=]
>  pr_debug("%s: %d: 0x%08X => " #__addr ")\n", \
>   ^
>include/linux/printk.h:285:21: note: in definition of macro 'pr_fmt'
> #define pr_fmt(fmt) fmt
> ^~~
>include/linux/printk.h:333:2: note: in expansion of macro 
> 'dynamic_pr_debug'
>  dynamic_pr_debug(fmt, ##__VA_ARGS__)
>  ^~~~
>drivers/staging/tegra-vde/vde.c:51:2: note: in expansion of macro 
> 'pr_debug'
>  pr_debug("%s: %d: 0x%08X => " #__addr ")\n", \
>  ^~~~
>drivers/staging/tegra-vde/vde.c:362:2: note: in expansion of macro 'VDE_WR'
>  VDE_WR(bitstream_data_paddr + bitstream_data_size,
>  ^~
>>> drivers/staging/tegra-vde/vde.c:51:11: warning: format '%X' expects 
>>> argument of type 'unsigned int', but argument 5 has type 'phys_addr_t {aka 
>>> long long unsigned int}' [-Wformat=]
>  pr_debug("%s: %d: 0x%08X => " #__addr ")\n", \
>   ^
>include/linux/printk.h:285:21: note: in definition of macro 'pr_fmt'
> #define pr_fmt(fmt) fmt
> ^~~
>include/linux/printk.h:333:2: note: in expansion of macro 
> 'dynamic_pr_debug'
>  dynamic_pr_debug(fmt, ##__VA_ARGS__)
>  ^~~~
>drivers/staging/tegra-vde/vde.c:51:2: note: in expansion of macro 
> 'pr_debug'
>  pr_debug("%s: %d: 0x%08X => " #__addr ")\n", \
>  ^~~~
>drivers/staging/tegra-vde/vde.c:435:2: note: in expansion of macro 'VDE_WR'
>  VDE_WR(bitstream_data_paddr, vde->regs + SXE(0x6C));
>  ^~
>drivers/staging/tegra-vde/vde.c: In function 'tegra_vde_attach_dmabuf':
>drivers/staging/tegra-vde/vde.c:531:40: warning: format '%d' expects 
> argument of type 'int', but argument 3 has type 'size_t {aka long unsigned 
> int}' [-Wformat=]
>   dev_err(dev, "Too small dmabuf size %d @0x%lX, "
>^
>drivers/staging/tegra-vde/vde.c: In function 'tegra_vde_ioctl_decode_h264':
>drivers/staging/tegra-vde/vde.c:855:16: warning: format '%X' expects 
> argument of type 'unsigned int', but argument 3 has type 'phys_addr_t {aka 
> long long unsigned int}' [-Wformat=]
>   dev_err(dev, "Decoding failed, "
>^~~
> 
> vim +51 drivers/staging/tegra-vde/vde.c
> 
>   > 11#include 
> 12#include 
> 13#include 
> 14#include 
> 15#include 
> 16#include 
> 17#include 
> 18#include 
> 19#include 
> 20#include 
> 21#include 
> 22#include 
> 23#include 
> 24
> 25#include 
> 26
> 27#include "uapi.h"
> 28
> 29#define SXE(offt)   (0x + (offt)) /* Syntax 
> Engine */
> 30#define BSEV(offt)  (0x1000 + (offt)) /* Video 
> Bitstream Engine */
> 31#define MBE(offt)   (0x2000 + (offt)) /* Macroblock 
> Engine */
> 32#define PPE(offt)   (0x2200 + (offt)) /* 
> Post-processing Engine */
> 33#define MCE(offt)   (0x2400 + (offt)) /* Motion 
> Compensation Eng. */
> 34

[PATCH v2 1/2] staging: Introduce NVIDIA Tegra20 video decoder driver

2017-10-03 Thread Dmitry Osipenko
Video decoder, found on NVIDIA Tegra20 SoC, supports a standard set of
video formats like H.264 / MPEG-4 / WMV / VC1. Currently driver supports
decoding of CAVLC H.264 only.

Signed-off-by: Dmitry Osipenko <dig...@gmail.com>
---
 .../bindings/arm/tegra/nvidia,tegra20-vde.txt  |   43 +
 drivers/staging/Kconfig|2 +
 drivers/staging/Makefile   |1 +
 drivers/staging/tegra-vde/Kconfig  |6 +
 drivers/staging/tegra-vde/Makefile |1 +
 drivers/staging/tegra-vde/TODO |5 +
 drivers/staging/tegra-vde/uapi.h   |  101 ++
 drivers/staging/tegra-vde/vde.c| 1105 
 8 files changed, 1264 insertions(+)
 create mode 100644 
Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-vde.txt
 create mode 100644 drivers/staging/tegra-vde/Kconfig
 create mode 100644 drivers/staging/tegra-vde/Makefile
 create mode 100644 drivers/staging/tegra-vde/TODO
 create mode 100644 drivers/staging/tegra-vde/uapi.h
 create mode 100644 drivers/staging/tegra-vde/vde.c

diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-vde.txt 
b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-vde.txt
new file mode 100644
index ..bf746777c58f
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-vde.txt
@@ -0,0 +1,43 @@
+NVIDIA Tegra Video Decoder Engine
+
+Required properties:
+- compatible : "nvidia,tegra20-vde"
+- reg : Must contain 2 register ranges: registers and IRAM region that
+VDE uses for its internal needs and for passing some of decoding
+parameters.
+- reg-names : Must include the following entries:
+  - regs
+  - iram
+- interrupts : Must contain an entry for each entry in interrupt-names.
+- interrupt-names : Must include the following entries:
+  - ucq-error
+  - sync-token
+  - bsev
+  - bsea
+  - sxe
+- clocks : Must contain an entry for each entry in clock-names.
+  See ../clocks/clock-bindings.txt for details.
+- clock-names : Must include the following entries:
+  - vde
+- resets : Must contain an entry for each entry in reset-names.
+  See ../reset/reset.txt for details.
+- reset-names : Must include the following entries:
+  - vde
+
+Example:
+   vde@6001a000 {
+   compatible = "nvidia,tegra20-vde";
+   reg = <0x6001a000 0x3D00/* VDE registers */
+  0x4400 0x3FC00>; /* IRAM region */
+   reg-names = "regs", "iram";
+   interrupts = , /* UCQ error 
interrupt */
+, /* Sync token 
interrupt */
+, /* BSE-V 
interrupt */
+, /* BSE-A 
interrupt */
+; /* SXE interrupt 
*/
+   interrupt-names = "ucq-error", "sync-token", "bsev", "bsea", 
"sxe";
+   clocks = <_car TEGRA20_CLK_VDE>;
+   clock-names = "vde";
+   resets = <_car 61>;
+   reset-names = "vde";
+   };
diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
index 554683912cff..10c982811093 100644
--- a/drivers/staging/Kconfig
+++ b/drivers/staging/Kconfig
@@ -118,4 +118,6 @@ source "drivers/staging/vboxvideo/Kconfig"
 
 source "drivers/staging/pi433/Kconfig"
 
+source "drivers/staging/tegra-vde/Kconfig"
+
 endif # STAGING
diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile
index 8951c37d8d80..c5ef39767f22 100644
--- a/drivers/staging/Makefile
+++ b/drivers/staging/Makefile
@@ -49,3 +49,4 @@ obj-$(CONFIG_BCM2835_VCHIQ)   += vc04_services/
 obj-$(CONFIG_CRYPTO_DEV_CCREE) += ccree/
 obj-$(CONFIG_DRM_VBOXVIDEO)+= vboxvideo/
 obj-$(CONFIG_PI433)+= pi433/
+obj-$(CONFIG_TEGRA_VDE)+= tegra-vde/
diff --git a/drivers/staging/tegra-vde/Kconfig 
b/drivers/staging/tegra-vde/Kconfig
new file mode 100644
index ..730ee006de66
--- /dev/null
+++ b/drivers/staging/tegra-vde/Kconfig
@@ -0,0 +1,6 @@
+config TEGRA_VDE
+   tristate "NVIDIA Tegra Video Decoder Engine driver"
+   depends on ARCH_TEGRA_2x_SOC || COMPILE_TEST
+   help
+   Say Y here to enable support for the NVIDIA Tegra video decoder
+   driver.
diff --git a/drivers/staging/tegra-vde/Makefile 
b/drivers/staging/tegra-vde/Makefile
new file mode 100644
index ..e7c0df1174bf
--- /dev/null
+++ b/drivers/staging/tegra-vde/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_TEGRA_VDE)+= vde.o
diff --git a/drivers/staging/tegra-vde/TODO b/drivers/staging/tegra-vde/TODO
new file mode 100644
index ..e98bbc7b3c19
--- /dev/null
+++ b/drivers/staging/tegra-vde/TODO
@@ -0,0 +1,5 @@
+TODO:
+   - Figure out how generic V4L2 API could be utilized by this driver,
+   

[PATCH v2 2/2] ARM: dts: tegra20: Add video decoder node

2017-10-03 Thread Dmitry Osipenko
Add a device node for the video decoder engine found on Tegra20.

Signed-off-by: Dmitry Osipenko <dig...@gmail.com>
---
 arch/arm/boot/dts/tegra20.dtsi | 17 +
 1 file changed, 17 insertions(+)

diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi
index 7c85f97f72ea..1b5d54b6c0cb 100644
--- a/arch/arm/boot/dts/tegra20.dtsi
+++ b/arch/arm/boot/dts/tegra20.dtsi
@@ -249,6 +249,23 @@
*/
};
 
+   vde@6001a000 {
+   compatible = "nvidia,tegra20-vde";
+   reg = <0x6001a000 0x3D00/* VDE registers */
+  0x4400 0x3FC00>; /* IRAM region */
+   reg-names = "regs", "iram";
+   interrupts = , /* UCQ error 
interrupt */
+, /* Sync token 
interrupt */
+, /* BSE-V 
interrupt */
+, /* BSE-A 
interrupt */
+; /* SXE interrupt 
*/
+   interrupt-names = "ucq-error", "sync-token", "bsev", "bsea", 
"sxe";
+   clocks = <_car TEGRA20_CLK_VDE>;
+   clock-names = "vde";
+   resets = <_car 61>;
+   reset-names = "vde";
+   };
+
apbmisc@7800 {
compatible = "nvidia,tegra20-apbmisc";
reg = <0x7800 0x64   /* Chip revision */
-- 
2.14.1



[PATCH v2 0/2] NVIDIA Tegra20 video decoder driver

2017-10-03 Thread Dmitry Osipenko
This driver provides accelerated video decoding to NVIDIA Tegra20 SoC's,
it is a result of reverse-engineering efforts. Driver has been tested on
Toshiba AC100 and Acer A500, it should work on any Tegra20 device.

In userspace this driver is utilized by libvdpau-tegra [0] that implements
VDPAU interface, so any video player that supports VDPAU can provide
accelerated video decoding on Tegra20 on Linux.

[0] https://github.com/grate-driver/libvdpau-tegra

Change log:
v2:
- Addressed v1 review comments from Stephen Warren and Dan Carpenter
- Implemented runtime PM
- Miscellaneous code cleanups
- Changed 'TODO'
- CC'd media maintainers for the review as per Greg K-H request,
  v1 can be viewed at https://lkml.org/lkml/2017/9/25/606

Dmitry Osipenko (2):
  staging: Introduce NVIDIA Tegra20 video decoder driver
  ARM: dts: tegra20: Add video decoder node

 .../bindings/arm/tegra/nvidia,tegra20-vde.txt  |   43 +
 arch/arm/boot/dts/tegra20.dtsi |   17 +
 drivers/staging/Kconfig|2 +
 drivers/staging/Makefile   |1 +
 drivers/staging/tegra-vde/Kconfig  |6 +
 drivers/staging/tegra-vde/Makefile |1 +
 drivers/staging/tegra-vde/TODO |5 +
 drivers/staging/tegra-vde/uapi.h   |  101 ++
 drivers/staging/tegra-vde/vde.c| 1105 
 9 files changed, 1281 insertions(+)
 create mode 100644 
Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-vde.txt
 create mode 100644 drivers/staging/tegra-vde/Kconfig
 create mode 100644 drivers/staging/tegra-vde/Makefile
 create mode 100644 drivers/staging/tegra-vde/TODO
 create mode 100644 drivers/staging/tegra-vde/uapi.h
 create mode 100644 drivers/staging/tegra-vde/vde.c

-- 
2.14.1