[PATCHv2 0/7] vicodec improvements

2018-08-23 Thread Hans Verkuil
From: Hans Verkuil 

- add support for quantization parameters
- support many more pixel formats
- code simplifications
- rename source and use proper prefixes for the codec: this makes it
  independent from the vicodec driver and easier to reuse in userspace
  (similar to what we do for the v4l2-tpg code).
- split off the v4l2 'frontend' code for the FWHT codec into its own
  source for easier re-use elsewhere (i.e. v4l2-ctl/qvidcap).

I made a v4l-utils branch that uses the FWHT codec to compress video
when streaming over the network:

https://git.linuxtv.org/hverkuil/v4l-utils.git/log/?h=qvidcap

You need to add the --stream-to-host-lossy flag to enable FWHT streaming.

Note: the FWHT codec clips R/G/B values for RGB formats. This will be
addressed later. I might have to convert the R/G/B values from full to
limited range before encoding them, but I want to discuss this with the
author of the codec (Tom aan de Wiel) first.

Regards,

Hans

Changes since v1:

- added the last patch (split off v4l2 FWHT code)
- the GOP_SIZE and QP controls can now be set during streaming as
  well.

Hans Verkuil (7):
  vicodec: add QP controls
  vicodec: add support for more pixel formats
  vicodec: simplify flags handling
  vicodec: simplify blocktype checking
  vicodec: improve handling of uncompressable planes
  vicodec: rename and use proper fwht prefix for codec
  vicodec: split off v4l2 specific parts for the codec

 .../media/uapi/v4l/pixfmt-compressed.rst  |   2 +-
 drivers/media/platform/vicodec/Makefile   |   2 +-
 .../vicodec/{vicodec-codec.c => codec-fwht.c} | 148 --
 .../vicodec/{vicodec-codec.h => codec-fwht.h} |  80 ++-
 .../media/platform/vicodec/codec-v4l2-fwht.c  | 325 
 .../media/platform/vicodec/codec-v4l2-fwht.h  |  50 ++
 drivers/media/platform/vicodec/vicodec-core.c | 483 --
 7 files changed, 723 insertions(+), 367 deletions(-)
 rename drivers/media/platform/vicodec/{vicodec-codec.c => codec-fwht.c} (85%)
 rename drivers/media/platform/vicodec/{vicodec-codec.h => codec-fwht.h} (64%)
 create mode 100644 drivers/media/platform/vicodec/codec-v4l2-fwht.c
 create mode 100644 drivers/media/platform/vicodec/codec-v4l2-fwht.h

-- 
2.18.0



[PATCHv2 2/7] vicodec: add support for more pixel formats

2018-08-23 Thread Hans Verkuil
From: Hans Verkuil 

Add support for 4:2:2, 4:4:4 and RGB 24/32 bits formats.

This makes it a lot more useful, esp. as a simple video compression
codec for use with v4l2-ctl/qvidcap.

Note that it does not do any conversion between e.g. 4:2:2 and 4:2:0
or RGB and YUV: it still just compresses planes be they Y/U/V or R/G/B.

Signed-off-by: Hans Verkuil 
---
 .../media/platform/vicodec/vicodec-codec.c|  62 ++-
 .../media/platform/vicodec/vicodec-codec.h|   5 +
 drivers/media/platform/vicodec/vicodec-core.c | 357 +-
 3 files changed, 324 insertions(+), 100 deletions(-)

diff --git a/drivers/media/platform/vicodec/vicodec-codec.c 
b/drivers/media/platform/vicodec/vicodec-codec.c
index 7163f11b7ee8..7bd11a974db0 100644
--- a/drivers/media/platform/vicodec/vicodec-codec.c
+++ b/drivers/media/platform/vicodec/vicodec-codec.c
@@ -229,7 +229,8 @@ static void fwht(const u8 *block, s16 *output_block, 
unsigned int stride,
stride *= input_step;
 
for (i = 0; i < 8; i++, tmp += stride, out += 8) {
-   if (input_step == 1) {
+   switch (input_step) {
+   case 1:
workspace1[0]  = tmp[0] + tmp[1] - add;
workspace1[1]  = tmp[0] - tmp[1];
 
@@ -241,7 +242,8 @@ static void fwht(const u8 *block, s16 *output_block, 
unsigned int stride,
 
workspace1[6]  = tmp[6] + tmp[7] - add;
workspace1[7]  = tmp[6] - tmp[7];
-   } else {
+   break;
+   case 2:
workspace1[0]  = tmp[0] + tmp[2] - add;
workspace1[1]  = tmp[0] - tmp[2];
 
@@ -253,6 +255,33 @@ static void fwht(const u8 *block, s16 *output_block, 
unsigned int stride,
 
workspace1[6]  = tmp[12] + tmp[14] - add;
workspace1[7]  = tmp[12] - tmp[14];
+   break;
+   case 3:
+   workspace1[0]  = tmp[0] + tmp[3] - add;
+   workspace1[1]  = tmp[0] - tmp[3];
+
+   workspace1[2]  = tmp[6] + tmp[9] - add;
+   workspace1[3]  = tmp[6] - tmp[9];
+
+   workspace1[4]  = tmp[12] + tmp[15] - add;
+   workspace1[5]  = tmp[12] - tmp[15];
+
+   workspace1[6]  = tmp[18] + tmp[21] - add;
+   workspace1[7]  = tmp[18] - tmp[21];
+   break;
+   default:
+   workspace1[0]  = tmp[0] + tmp[4] - add;
+   workspace1[1]  = tmp[0] - tmp[4];
+
+   workspace1[2]  = tmp[8] + tmp[12] - add;
+   workspace1[3]  = tmp[8] - tmp[12];
+
+   workspace1[4]  = tmp[16] + tmp[20] - add;
+   workspace1[5]  = tmp[16] - tmp[20];
+
+   workspace1[6]  = tmp[24] + tmp[28] - add;
+   workspace1[7]  = tmp[24] - tmp[28];
+   break;
}
 
/* stage 2 */
@@ -704,25 +733,28 @@ u32 encode_frame(struct raw_frame *frm, struct raw_frame 
*ref_frm,
__be16 *rlco = cf->rlc_data;
__be16 *rlco_max;
u32 encoding;
+   u32 chroma_h = frm->height / frm->height_div;
+   u32 chroma_w = frm->width / frm->width_div;
+   unsigned int chroma_size = chroma_h * chroma_w;
 
rlco_max = rlco + size / 2 - 256;
encoding = encode_plane(frm->luma, ref_frm->luma, &rlco, rlco_max, cf,
- frm->height, frm->width,
- 1, is_intra, next_is_intra);
+   frm->height, frm->width,
+   frm->luma_step, is_intra, next_is_intra);
if (encoding & FRAME_UNENCODED)
encoding |= LUMA_UNENCODED;
encoding &= ~FRAME_UNENCODED;
-   rlco_max = rlco + size / 8 - 256;
+   rlco_max = rlco + chroma_size / 2 - 256;
encoding |= encode_plane(frm->cb, ref_frm->cb, &rlco, rlco_max, cf,
-  frm->height / 2, frm->width / 2,
-  frm->chroma_step, is_intra, next_is_intra);
+chroma_h, chroma_w,
+frm->chroma_step, is_intra, next_is_intra);
if (encoding & FRAME_UNENCODED)
encoding |= CB_UNENCODED;
encoding &= ~FRAME_UNENCODED;
-   rlco_max = rlco + size / 8 - 256;
+   rlco_max = rlco + chroma_size / 2 - 256;
encoding |= encode_plane(frm->cr, ref_frm->cr, &rlco, rlco_max, cf,
-  frm->height / 2, frm->width / 2,
-  frm->chroma_step, is_intra, next_is_intra);
+chroma_h, chroma_w,
+frm->chroma_step, is_intra, next_is_intra);
if (encoding & FRAME_UNENCODED)
 

[PATCHv2 3/7] vicodec: simplify flags handling

2018-08-23 Thread Hans Verkuil
From: Hans Verkuil 

The flags field can be removed from struct vicodec_q_data.
This simplifies the flags handling elsewhere.

Signed-off-by: Hans Verkuil 
---
 drivers/media/platform/vicodec/vicodec-core.c | 26 +--
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/media/platform/vicodec/vicodec-core.c 
b/drivers/media/platform/vicodec/vicodec-core.c
index 6253fd9e1d30..4680b3c9b9b2 100644
--- a/drivers/media/platform/vicodec/vicodec-core.c
+++ b/drivers/media/platform/vicodec/vicodec-core.c
@@ -101,7 +101,6 @@ static struct platform_device vicodec_pdev = {
 struct vicodec_q_data {
unsigned intwidth;
unsigned intheight;
-   unsigned intflags;
unsigned intsizeimage;
unsigned intsequence;
const struct pixfmt_info *info;
@@ -188,7 +187,7 @@ static struct vicodec_q_data *get_q_data(struct vicodec_ctx 
*ctx,
 
 static void encode(struct vicodec_ctx *ctx,
   struct vicodec_q_data *q_data,
-  u8 *p_in, u8 *p_out)
+  u8 *p_in, u8 *p_out, u32 flags)
 {
unsigned int size = q_data->width * q_data->height;
const struct pixfmt_info *info = q_data->info;
@@ -293,17 +292,17 @@ static void encode(struct vicodec_ctx *ctx,
p_hdr->version = htonl(VICODEC_VERSION);
p_hdr->width = htonl(cf.width);
p_hdr->height = htonl(cf.height);
-   p_hdr->flags = htonl(q_data->flags);
if (encoding & LUMA_UNENCODED)
-   p_hdr->flags |= htonl(VICODEC_FL_LUMA_IS_UNCOMPRESSED);
+   flags |= VICODEC_FL_LUMA_IS_UNCOMPRESSED;
if (encoding & CB_UNENCODED)
-   p_hdr->flags |= htonl(VICODEC_FL_CB_IS_UNCOMPRESSED);
+   flags |= VICODEC_FL_CB_IS_UNCOMPRESSED;
if (encoding & CR_UNENCODED)
-   p_hdr->flags |= htonl(VICODEC_FL_CR_IS_UNCOMPRESSED);
+   flags |= VICODEC_FL_CR_IS_UNCOMPRESSED;
if (rf.height_div == 1)
-   p_hdr->flags |= htonl(VICODEC_FL_CHROMA_FULL_HEIGHT);
+   flags |= VICODEC_FL_CHROMA_FULL_HEIGHT;
if (rf.width_div == 1)
-   p_hdr->flags |= htonl(VICODEC_FL_CHROMA_FULL_WIDTH);
+   flags |= VICODEC_FL_CHROMA_FULL_WIDTH;
+   p_hdr->flags = htonl(flags);
p_hdr->colorspace = htonl(ctx->colorspace);
p_hdr->xfer_func = htonl(ctx->xfer_func);
p_hdr->ycbcr_enc = htonl(ctx->ycbcr_enc);
@@ -320,6 +319,7 @@ static int decode(struct vicodec_ctx *ctx,
unsigned int size = q_data->width * q_data->height;
unsigned int chroma_size = size;
unsigned int i;
+   u32 flags;
struct cframe_hdr *p_hdr;
struct cframe cf;
u8 *p;
@@ -327,7 +327,7 @@ static int decode(struct vicodec_ctx *ctx,
p_hdr = (struct cframe_hdr *)p_in;
cf.width = ntohl(p_hdr->width);
cf.height = ntohl(p_hdr->height);
-   q_data->flags = ntohl(p_hdr->flags);
+   flags = ntohl(p_hdr->flags);
ctx->colorspace = ntohl(p_hdr->colorspace);
ctx->xfer_func = ntohl(p_hdr->xfer_func);
ctx->ycbcr_enc = ntohl(p_hdr->ycbcr_enc);
@@ -348,12 +348,12 @@ static int decode(struct vicodec_ctx *ctx,
if (cf.width != q_data->width || cf.height != q_data->height)
return -EINVAL;
 
-   if (!(q_data->flags & VICODEC_FL_CHROMA_FULL_WIDTH))
+   if (!(flags & VICODEC_FL_CHROMA_FULL_WIDTH))
chroma_size /= 2;
-   if (!(q_data->flags & VICODEC_FL_CHROMA_FULL_HEIGHT))
+   if (!(flags & VICODEC_FL_CHROMA_FULL_HEIGHT))
chroma_size /= 2;
 
-   decode_frame(&cf, &ctx->ref_frame, q_data->flags);
+   decode_frame(&cf, &ctx->ref_frame, flags);
 
switch (q_data->info->id) {
case V4L2_PIX_FMT_YUV420:
@@ -486,7 +486,7 @@ static int device_process(struct vicodec_ctx *ctx,
if (ctx->is_enc) {
struct cframe_hdr *p_hdr = (struct cframe_hdr *)p_out;
 
-   encode(ctx, q_out, p_in, p_out);
+   encode(ctx, q_out, p_in, p_out, 0);
vb2_set_plane_payload(&out_vb->vb2_buf, 0,
  sizeof(*p_hdr) + ntohl(p_hdr->size));
} else {
-- 
2.18.0



[PATCHv2 7/7] vicodec: split off v4l2 specific parts for the codec

2018-08-23 Thread Hans Verkuil
From: Hans Verkuil 

Split off the decode and encode functions into a separate
source that can be reused elsewhere.

Signed-off-by: Hans Verkuil 
---
 drivers/media/platform/vicodec/Makefile   |   2 +-
 .../media/platform/vicodec/codec-v4l2-fwht.c  | 325 
 .../media/platform/vicodec/codec-v4l2-fwht.h  |  50 ++
 drivers/media/platform/vicodec/vicodec-core.c | 463 --
 4 files changed, 458 insertions(+), 382 deletions(-)
 create mode 100644 drivers/media/platform/vicodec/codec-v4l2-fwht.c
 create mode 100644 drivers/media/platform/vicodec/codec-v4l2-fwht.h

diff --git a/drivers/media/platform/vicodec/Makefile 
b/drivers/media/platform/vicodec/Makefile
index a27242ff14ad..01bf7e9308a6 100644
--- a/drivers/media/platform/vicodec/Makefile
+++ b/drivers/media/platform/vicodec/Makefile
@@ -1,4 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
-vicodec-objs := vicodec-core.o codec-fwht.o
+vicodec-objs := vicodec-core.o codec-fwht.o codec-v4l2-fwht.o
 
 obj-$(CONFIG_VIDEO_VICODEC) += vicodec.o
diff --git a/drivers/media/platform/vicodec/codec-v4l2-fwht.c 
b/drivers/media/platform/vicodec/codec-v4l2-fwht.c
new file mode 100644
index ..cfcf84b8574d
--- /dev/null
+++ b/drivers/media/platform/vicodec/codec-v4l2-fwht.c
@@ -0,0 +1,325 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * A V4L2 frontend for the FWHT codec
+ *
+ * Copyright 2018 Cisco Systems, Inc. and/or its affiliates. All rights 
reserved.
+ */
+
+#include 
+#include 
+#include 
+#include "codec-v4l2-fwht.h"
+
+static const struct v4l2_fwht_pixfmt_info v4l2_fwht_pixfmts[] = {
+   { V4L2_PIX_FMT_YUV420,  1, 3, 2, 1, 1, 2, 2 },
+   { V4L2_PIX_FMT_YVU420,  1, 3, 2, 1, 1, 2, 2 },
+   { V4L2_PIX_FMT_YUV422P, 1, 2, 1, 1, 1, 2, 1 },
+   { V4L2_PIX_FMT_NV12,1, 3, 2, 1, 2, 2, 2 },
+   { V4L2_PIX_FMT_NV21,1, 3, 2, 1, 2, 2, 2 },
+   { V4L2_PIX_FMT_NV16,1, 2, 1, 1, 2, 2, 1 },
+   { V4L2_PIX_FMT_NV61,1, 2, 1, 1, 2, 2, 1 },
+   { V4L2_PIX_FMT_NV24,1, 3, 1, 1, 2, 1, 1 },
+   { V4L2_PIX_FMT_NV42,1, 3, 1, 1, 2, 1, 1 },
+   { V4L2_PIX_FMT_YUYV,2, 2, 1, 2, 4, 2, 1 },
+   { V4L2_PIX_FMT_YVYU,2, 2, 1, 2, 4, 2, 1 },
+   { V4L2_PIX_FMT_UYVY,2, 2, 1, 2, 4, 2, 1 },
+   { V4L2_PIX_FMT_VYUY,2, 2, 1, 2, 4, 2, 1 },
+   { V4L2_PIX_FMT_BGR24,   3, 3, 1, 3, 3, 1, 1 },
+   { V4L2_PIX_FMT_RGB24,   3, 3, 1, 3, 3, 1, 1 },
+   { V4L2_PIX_FMT_HSV24,   3, 3, 1, 3, 3, 1, 1 },
+   { V4L2_PIX_FMT_BGR32,   4, 4, 1, 4, 4, 1, 1 },
+   { V4L2_PIX_FMT_XBGR32,  4, 4, 1, 4, 4, 1, 1 },
+   { V4L2_PIX_FMT_RGB32,   4, 4, 1, 4, 4, 1, 1 },
+   { V4L2_PIX_FMT_XRGB32,  4, 4, 1, 4, 4, 1, 1 },
+   { V4L2_PIX_FMT_HSV32,   4, 4, 1, 4, 4, 1, 1 },
+};
+
+const struct v4l2_fwht_pixfmt_info *v4l2_fwht_find_pixfmt(u32 pixelformat)
+{
+   unsigned int i;
+
+   for (i = 0; i < ARRAY_SIZE(v4l2_fwht_pixfmts); i++)
+   if (v4l2_fwht_pixfmts[i].id == pixelformat)
+   return v4l2_fwht_pixfmts + i;
+   return NULL;
+}
+
+const struct v4l2_fwht_pixfmt_info *v4l2_fwht_get_pixfmt(u32 idx)
+{
+   if (idx >= ARRAY_SIZE(v4l2_fwht_pixfmts))
+   return NULL;
+   return v4l2_fwht_pixfmts + idx;
+}
+
+unsigned int v4l2_fwht_encode(struct v4l2_fwht_state *state,
+ u8 *p_in, u8 *p_out)
+{
+   unsigned int size = state->width * state->height;
+   const struct v4l2_fwht_pixfmt_info *info = state->info;
+   struct fwht_cframe_hdr *p_hdr;
+   struct fwht_cframe cf;
+   struct fwht_raw_frame rf;
+   u32 encoding;
+   u32 flags = 0;
+
+   rf.width = state->width;
+   rf.height = state->height;
+   rf.luma = p_in;
+   rf.width_div = info->width_div;
+   rf.height_div = info->height_div;
+   rf.luma_step = info->luma_step;
+   rf.chroma_step = info->chroma_step;
+
+   switch (info->id) {
+   case V4L2_PIX_FMT_YUV420:
+   rf.cb = rf.luma + size;
+   rf.cr = rf.cb + size / 4;
+   break;
+   case V4L2_PIX_FMT_YVU420:
+   rf.cr = rf.luma + size;
+   rf.cb = rf.cr + size / 4;
+   break;
+   case V4L2_PIX_FMT_YUV422P:
+   rf.cb = rf.luma + size;
+   rf.cr = rf.cb + size / 2;
+   break;
+   case V4L2_PIX_FMT_NV12:
+   case V4L2_PIX_FMT_NV16:
+   case V4L2_PIX_FMT_NV24:
+   rf.cb = rf.luma + size;
+   rf.cr = rf.cb + 1;
+   break;
+   case V4L2_PIX_FMT_NV21:
+   case V4L2_PIX_FMT_NV61:
+   case V4L2_PIX_FMT_NV42:
+   rf.cr = rf.luma + size;
+   rf.cb = rf.cr + 1;
+   break;
+   case V4L2_PIX_FMT_YUYV:
+   rf.cb = rf.luma + 1;
+   rf.cr = rf.cb + 2;
+   break;
+   case V4L2_PIX_FMT_YVYU:
+   rf.cr = rf.luma + 1;
+   rf.cb = rf.cr + 2;
+   break;
+   case V4

[PATCHv2 5/7] vicodec: improve handling of uncompressable planes

2018-08-23 Thread Hans Verkuil
From: Hans Verkuil 

Exit the loop immediately once it is clear that the plane
cannot be compressed. Also clear the PCODED bit and fix the
PCODED check (it should check for the bit) in the caller code.

Signed-off-by: Hans Verkuil 
---
 drivers/media/platform/vicodec/vicodec-codec.c | 10 ++
 drivers/media/platform/vicodec/vicodec-core.c  |  2 +-
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/media/platform/vicodec/vicodec-codec.c 
b/drivers/media/platform/vicodec/vicodec-codec.c
index e402d988f2ad..3547129c1163 100644
--- a/drivers/media/platform/vicodec/vicodec-codec.c
+++ b/drivers/media/platform/vicodec/vicodec-codec.c
@@ -685,9 +685,6 @@ static u32 encode_plane(u8 *input, u8 *refp, __be16 **rlco, 
__be16 *rlco_max,
input += 8 * input_step;
refp += 8 * 8;
 
-   if (encoding & FRAME_UNENCODED)
-   continue;
-
size = rlc(cf->coeffs, *rlco, blocktype);
if (last_size == size &&
!memcmp(*rlco + 1, *rlco - size + 1, 2 * size - 2)) 
{
@@ -702,12 +699,16 @@ static u32 encode_plane(u8 *input, u8 *refp, __be16 
**rlco, __be16 *rlco_max,
} else {
*rlco += size;
}
-   if (*rlco >= rlco_max)
+   if (*rlco >= rlco_max) {
encoding |= FRAME_UNENCODED;
+   goto exit_loop;
+   }
last_size = size;
}
input += width * 7 * input_step;
}
+
+exit_loop:
if (encoding & FRAME_UNENCODED) {
u8 *out = (u8 *)rlco_start;
 
@@ -721,6 +722,7 @@ static u32 encode_plane(u8 *input, u8 *refp, __be16 **rlco, 
__be16 *rlco_max,
for (i = 0; i < height * width; i++, input += input_step)
*out++ = (*input == 0xff) ? 0xfe : *input;
*rlco = (__be16 *)out;
+   encoding &= ~FRAME_PCODED;
}
return encoding;
 }
diff --git a/drivers/media/platform/vicodec/vicodec-core.c 
b/drivers/media/platform/vicodec/vicodec-core.c
index 4680b3c9b9b2..caff521d94c6 100644
--- a/drivers/media/platform/vicodec/vicodec-core.c
+++ b/drivers/media/platform/vicodec/vicodec-core.c
@@ -281,7 +281,7 @@ static void encode(struct vicodec_ctx *ctx,
 
encoding = encode_frame(&rf, &ctx->ref_frame, &cf, !ctx->gop_cnt,
ctx->gop_cnt == ctx->gop_size - 1);
-   if (encoding != FRAME_PCODED)
+   if (!(encoding & FRAME_PCODED))
ctx->gop_cnt = 0;
if (++ctx->gop_cnt >= ctx->gop_size)
ctx->gop_cnt = 0;
-- 
2.18.0



[PATCHv2 1/7] vicodec: add QP controls

2018-08-23 Thread Hans Verkuil
From: Hans Verkuil 

Instead of hardcoding the quantization parameter (or 'DEADZONE_WIDTH'
as it was called in the codec) make this configurable through two
controls: one for I frames, one for P frames.

Also allow changing these parameters and the GOP_SIZE parameter while
streaming.

Signed-off-by: Hans Verkuil 
---
 .../media/platform/vicodec/vicodec-codec.c| 17 +++--
 .../media/platform/vicodec/vicodec-codec.h|  2 +
 drivers/media/platform/vicodec/vicodec-core.c | 66 ---
 3 files changed, 68 insertions(+), 17 deletions(-)

diff --git a/drivers/media/platform/vicodec/vicodec-codec.c 
b/drivers/media/platform/vicodec/vicodec-codec.c
index 2d047646f614..7163f11b7ee8 100644
--- a/drivers/media/platform/vicodec/vicodec-codec.c
+++ b/drivers/media/platform/vicodec/vicodec-codec.c
@@ -13,7 +13,6 @@
 #include "vicodec-codec.h"
 
 #define ALL_ZEROS 15
-#define DEADZONE_WIDTH 20
 
 static const uint8_t zigzag[64] = {
0,
@@ -164,7 +163,7 @@ static const int quant_table_p[] = {
3, 3, 3, 6, 6, 9,  9,  10,
 };
 
-static void quantize_intra(s16 *coeff, s16 *de_coeff)
+static void quantize_intra(s16 *coeff, s16 *de_coeff, u16 qp)
 {
const int *quant = quant_table;
int i, j;
@@ -172,8 +171,7 @@ static void quantize_intra(s16 *coeff, s16 *de_coeff)
for (j = 0; j < 8; j++) {
for (i = 0; i < 8; i++, quant++, coeff++, de_coeff++) {
*coeff >>= *quant;
-   if (*coeff >= -DEADZONE_WIDTH &&
-   *coeff <= DEADZONE_WIDTH)
+   if (*coeff >= -qp && *coeff <= qp)
*coeff = *de_coeff = 0;
else
*de_coeff = *coeff << *quant;
@@ -191,7 +189,7 @@ static void dequantize_intra(s16 *coeff)
*coeff <<= *quant;
 }
 
-static void quantize_inter(s16 *coeff, s16 *de_coeff)
+static void quantize_inter(s16 *coeff, s16 *de_coeff, u16 qp)
 {
const int *quant = quant_table_p;
int i, j;
@@ -199,8 +197,7 @@ static void quantize_inter(s16 *coeff, s16 *de_coeff)
for (j = 0; j < 8; j++) {
for (i = 0; i < 8; i++, quant++, coeff++, de_coeff++) {
*coeff >>= *quant;
-   if (*coeff >= -DEADZONE_WIDTH &&
-   *coeff <= DEADZONE_WIDTH)
+   if (*coeff >= -qp && *coeff <= qp)
*coeff = *de_coeff = 0;
else
*de_coeff = *coeff << *quant;
@@ -639,13 +636,15 @@ static u32 encode_plane(u8 *input, u8 *refp, __be16 
**rlco, __be16 *rlco_max,
deltablock, width, input_step);
if (is_intra || blocktype == IBLOCK) {
fwht(input, cf->coeffs, width, input_step, 1);
-   quantize_intra(cf->coeffs, cf->de_coeffs);
+   quantize_intra(cf->coeffs, cf->de_coeffs,
+  cf->i_frame_qp);
blocktype = IBLOCK;
} else {
/* inter code */
encoding |= FRAME_PCODED;
fwht16(deltablock, cf->coeffs, 8, 0);
-   quantize_inter(cf->coeffs, cf->de_coeffs);
+   quantize_inter(cf->coeffs, cf->de_coeffs,
+  cf->p_frame_qp);
}
if (!next_is_intra) {
ifwht(cf->de_coeffs, cf->de_fwht, blocktype);
diff --git a/drivers/media/platform/vicodec/vicodec-codec.h 
b/drivers/media/platform/vicodec/vicodec-codec.h
index cdfad1332a3e..cabe7b98623b 100644
--- a/drivers/media/platform/vicodec/vicodec-codec.h
+++ b/drivers/media/platform/vicodec/vicodec-codec.h
@@ -103,6 +103,8 @@ struct cframe_hdr {
 
 struct cframe {
unsigned int width, height;
+   u16 i_frame_qp;
+   u16 p_frame_qp;
__be16 *rlc_data;
s16 coeffs[8 * 8];
s16 de_coeffs[8 * 8];
diff --git a/drivers/media/platform/vicodec/vicodec-core.c 
b/drivers/media/platform/vicodec/vicodec-core.c
index 408cd55d3580..1f14e94e61b4 100644
--- a/drivers/media/platform/vicodec/vicodec-core.c
+++ b/drivers/media/platform/vicodec/vicodec-core.c
@@ -96,9 +96,10 @@ struct vicodec_ctx {
spinlock_t  *lock;
 
struct v4l2_ctrl_handler hdl;
-   struct v4l2_ctrl*ctrl_gop_size;
unsigned intgop_size;
unsigned intgop_cnt;
+   u16 i_frame_qp;
+   u16 p_frame_qp;
 
/* Abort requested by m2m */
int aborting;
@@ -191,13 +192,15 @@ static void encode(struct vicodec_ctx *ctx,
 
cf.width = 

[PATCHv2 6/7] vicodec: rename and use proper fwht prefix for codec

2018-08-23 Thread Hans Verkuil
From: Hans Verkuil 

The codec source is generic and not vicodec specific. It can be used
by other drivers or userspace as well. So rename the source and header
to something more generic (codec-fwht.c/h) and prefix the defines, types
and functions with fwht_.

Signed-off-by: Hans Verkuil 
---
 .../media/uapi/v4l/pixfmt-compressed.rst  |  2 +-
 drivers/media/platform/vicodec/Makefile   |  2 +-
 .../vicodec/{vicodec-codec.c => codec-fwht.c} | 62 -
 .../vicodec/{vicodec-codec.h => codec-fwht.h} | 77 +++-
 drivers/media/platform/vicodec/vicodec-core.c | 89 ++-
 5 files changed, 118 insertions(+), 114 deletions(-)
 rename drivers/media/platform/vicodec/{vicodec-codec.c => codec-fwht.c} (93%)
 rename drivers/media/platform/vicodec/{vicodec-codec.h => codec-fwht.h} (63%)

diff --git a/Documentation/media/uapi/v4l/pixfmt-compressed.rst 
b/Documentation/media/uapi/v4l/pixfmt-compressed.rst
index d382e7a5c38e..d04b18adac33 100644
--- a/Documentation/media/uapi/v4l/pixfmt-compressed.rst
+++ b/Documentation/media/uapi/v4l/pixfmt-compressed.rst
@@ -101,4 +101,4 @@ Compressed Formats
   - 'FWHT'
   - Video elementary stream using a codec based on the Fast Walsh Hadamard
 Transform. This codec is implemented by the vicodec ('Virtual Codec')
-   driver. See the vicodec-codec.h header for more details.
+   driver. See the codec-fwht.h header for more details.
diff --git a/drivers/media/platform/vicodec/Makefile 
b/drivers/media/platform/vicodec/Makefile
index 197229428953..a27242ff14ad 100644
--- a/drivers/media/platform/vicodec/Makefile
+++ b/drivers/media/platform/vicodec/Makefile
@@ -1,4 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
-vicodec-objs := vicodec-core.o vicodec-codec.o
+vicodec-objs := vicodec-core.o codec-fwht.o
 
 obj-$(CONFIG_VIDEO_VICODEC) += vicodec.o
diff --git a/drivers/media/platform/vicodec/vicodec-codec.c 
b/drivers/media/platform/vicodec/codec-fwht.c
similarity index 93%
rename from drivers/media/platform/vicodec/vicodec-codec.c
rename to drivers/media/platform/vicodec/codec-fwht.c
index 3547129c1163..f91f90f7e5fc 100644
--- a/drivers/media/platform/vicodec/vicodec-codec.c
+++ b/drivers/media/platform/vicodec/codec-fwht.c
@@ -10,7 +10,18 @@
  */
 
 #include 
-#include "vicodec-codec.h"
+#include "codec-fwht.h"
+
+/*
+ * Note: bit 0 of the header must always be 0. Otherwise it cannot
+ * be guaranteed that the magic 8 byte sequence (see below) can
+ * never occur in the rlc output.
+ */
+#define PFRAME_BIT BIT(15)
+#define DUPS_MASK 0x1ffe
+
+#define PBLOCK 0
+#define IBLOCK 1
 
 #define ALL_ZEROS 15
 
@@ -642,7 +653,7 @@ static void add_deltas(s16 *deltas, const u8 *ref, int 
stride)
 }
 
 static u32 encode_plane(u8 *input, u8 *refp, __be16 **rlco, __be16 *rlco_max,
-   struct cframe *cf, u32 height, u32 width,
+   struct fwht_cframe *cf, u32 height, u32 width,
unsigned int input_step,
bool is_intra, bool next_is_intra)
 {
@@ -669,7 +680,7 @@ static u32 encode_plane(u8 *input, u8 *refp, __be16 **rlco, 
__be16 *rlco_max,
   cf->i_frame_qp);
} else {
/* inter code */
-   encoding |= FRAME_PCODED;
+   encoding |= FWHT_FRAME_PCODED;
fwht16(deltablock, cf->coeffs, 8, 0);
quantize_inter(cf->coeffs, cf->de_coeffs,
   cf->p_frame_qp);
@@ -700,7 +711,7 @@ static u32 encode_plane(u8 *input, u8 *refp, __be16 **rlco, 
__be16 *rlco_max,
*rlco += size;
}
if (*rlco >= rlco_max) {
-   encoding |= FRAME_UNENCODED;
+   encoding |= FWHT_FRAME_UNENCODED;
goto exit_loop;
}
last_size = size;
@@ -709,7 +720,7 @@ static u32 encode_plane(u8 *input, u8 *refp, __be16 **rlco, 
__be16 *rlco_max,
}
 
 exit_loop:
-   if (encoding & FRAME_UNENCODED) {
+   if (encoding & FWHT_FRAME_UNENCODED) {
u8 *out = (u8 *)rlco_start;
 
input = input_start;
@@ -722,13 +733,15 @@ static u32 encode_plane(u8 *input, u8 *refp, __be16 
**rlco, __be16 *rlco_max,
for (i = 0; i < height * width; i++, input += input_step)
*out++ = (*input == 0xff) ? 0xfe : *input;
*rlco = (__be16 *)out;
-   encoding &= ~FRAME_PCODED;
+   encoding &= ~FWHT_FRAME_PCODED;
}
return encoding;
 }
 
-u32 encode_frame(struct raw_frame *frm, struct raw_frame *ref_frm,
-struct cframe *cf, bool is_intra, bool next_is_intra)
+u32 fwht_encode_frame(struct fwht_raw_frame *frm,
+ s

[PATCHv2 4/7] vicodec: simplify blocktype checking

2018-08-23 Thread Hans Verkuil
From: Hans Verkuil 

Simplify some blocktype/is_intra checks.

Signed-off-by: Hans Verkuil 
---
 drivers/media/platform/vicodec/vicodec-codec.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/media/platform/vicodec/vicodec-codec.c 
b/drivers/media/platform/vicodec/vicodec-codec.c
index 7bd11a974db0..e402d988f2ad 100644
--- a/drivers/media/platform/vicodec/vicodec-codec.c
+++ b/drivers/media/platform/vicodec/vicodec-codec.c
@@ -663,11 +663,10 @@ static u32 encode_plane(u8 *input, u8 *refp, __be16 
**rlco, __be16 *rlco_max,
if (!is_intra)
blocktype = decide_blocktype(input, refp,
deltablock, width, input_step);
-   if (is_intra || blocktype == IBLOCK) {
+   if (blocktype == IBLOCK) {
fwht(input, cf->coeffs, width, input_step, 1);
quantize_intra(cf->coeffs, cf->de_coeffs,
   cf->i_frame_qp);
-   blocktype = IBLOCK;
} else {
/* inter code */
encoding |= FRAME_PCODED;
-- 
2.18.0



[PATCH] media: aptina-pll: allow approximating the requested pix_clock

2018-08-23 Thread Helmut Grohne
Clock frequencies are not exact values, but rather imprecise, physical
properties. The present pll computation however, treats them as exact.
It tries to compute parameters that attain the requested pix_clock
exactly. Failing that, it gives up.

The new implementation approximates the requested pix_clock and reports
the actual value resulting from the computed parameters. If the
requested pix_clock can be attained, the original behaviour of
maximizing p1 is retained.

Experiments with the algorithm in userspace on an arm device show that
the computational cost is negligible compared to executing a binary for
all practical inputs.

Signed-off-by: Helmut Grohne 
---
 drivers/media/i2c/aptina-pll.c | 263 -
 drivers/media/i2c/mt9m032.c|  15 ++-
 2 files changed, 165 insertions(+), 113 deletions(-)

I tried using aptina_pll_calculate in a driver for a similar image sensor.
Using it, I was unable to find a pix_clock value such that the computation was
successful. Most of the practical parameters result in a non-integer pix_clock,
something that struct pll cannot represent.

The driver is not ready for submission at this point, but the changes to
aptina_pll_calculate are reasonable on their own, which is why I submit them
separately now.

Helmut

diff --git a/drivers/media/i2c/aptina-pll.c b/drivers/media/i2c/aptina-pll.c
index 224ae4e4cf8b..249018772b2b 100644
--- a/drivers/media/i2c/aptina-pll.c
+++ b/drivers/media/i2c/aptina-pll.c
@@ -2,6 +2,7 @@
  * Aptina Sensor PLL Configuration
  *
  * Copyright (C) 2012 Laurent Pinchart 
+ * Copyright (C) 2018 Intenta GmbH
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
@@ -14,23 +15,84 @@
  */
 
 #include 
-#include 
 #include 
-#include 
+#include 
 #include 
 
 #include "aptina-pll.h"
 
+/* A variant of mult_frac that works even when (x % denom) * numer produces a
+ * 32bit overflow.
+ */
+#define mult_frac_exact(x, numer, denom) \
+   ((u32)div_u64(mul_u32_u32((x), (numer)), (denom)))
+
+static unsigned int absdiff(unsigned int x, unsigned int y)
+{
+   return x > y ? x - y : y - x;
+}
+
+/* Find n_min <= *np <= n_max and d_min <= *dp <= d_max such that *np / *dp
+ * approximates n_target / d_target.
+ */
+static int approximate_fraction(unsigned int n_min, unsigned int n_max,
+   unsigned int d_min, unsigned int d_max,
+   unsigned int n_target, unsigned int d_target,
+   unsigned int *np, unsigned int *dp)
+{
+   unsigned int d, best_err = UINT_MAX;
+
+   d_min = max(d_min, mult_frac_exact(n_min, d_target, n_target));
+   d_max = min(d_max, mult_frac_exact(n_max, d_target, n_target) + 1);
+   if (d_min > d_max)
+   return -EINVAL;
+
+   for (d = d_min; d < d_max; ++d) {
+   unsigned int n = mult_frac_exact(d, n_target, d_target);
+
+   if (n >= n_min) {
+   unsigned int err = absdiff(
+   n_target, mult_frac_exact(n, d_target, d));
+
+   if (err < best_err) {
+   best_err = err;
+   *np = n;
+   *dp = d;
+   }
+   if (err == 0)
+   return 0;
+   }
+   ++n;
+   if (n <= n_max) {
+   unsigned int err = absdiff(
+   n_target, mult_frac_exact(n, d_target, d));
+
+   if (err < best_err) {
+   best_err = err;
+   *np = n;
+   *dp = d;
+   }
+   }
+   }
+   return best_err == UINT_MAX ? -EINVAL : 0;
+}
+
+/* Find parameters n, m, p1 such that:
+ *   n_min <= n <= n_max
+ *   m_min <= m <= m_max
+ *   p1_min <= p1 <= p1_max, even
+ *   int_clock_min <= ext_clock / n <= int_clock_max
+ *   out_clock_min <= ext_clock / n * m <= out_clock_max
+ *   pix_clock = ext_clock / n * m / p1 (approximately)
+ *   p1 is maximized
+ * Report the achieved pix_clock (input/output parameter).
+ */
 int aptina_pll_calculate(struct device *dev,
 const struct aptina_pll_limits *limits,
 struct aptina_pll *pll)
 {
-   unsigned int mf_min;
-   unsigned int mf_max;
-   unsigned int p1_min;
-   unsigned int p1_max;
-   unsigned int p1;
-   unsigned int div;
+   unsigned int n_min, n_max, m_min, m_max, p1_min, p1_max, p1;
+   unsigned int clock_err = UINT_MAX;
 
dev_dbg(dev, "PLL: ext clock %u pix clock %u\n",
pll->ext_clock, pll->pix_clock);
@@ -46,120 +108,103 @@ int aptina_pll_calculate(struct device *dev,
return -EINVAL;
}
 
-   /* Compute the multiplie

Re: [RFC] Request API and V4L2 capabilities

2018-08-23 Thread Paul Kocialkowski
Hi,

On Wed, 2018-08-22 at 14:33 -0300, Ezequiel Garcia wrote:
> On Wed, 2018-08-22 at 16:10 +0200, Paul Kocialkowski wrote:
> > Hi,
> > 
> > On Tue, 2018-08-21 at 17:52 +0900, Tomasz Figa wrote:
> > > Hi Hans, Paul,
> > > 
> > > On Mon, Aug 6, 2018 at 6:29 PM Paul Kocialkowski
> > >  wrote:
> > > > 
> > > > On Mon, 2018-08-06 at 11:23 +0200, Hans Verkuil wrote:
> > > > > On 08/06/2018 11:13 AM, Paul Kocialkowski wrote:
> > > > > > Hi,
> > > > > > 
> > > > > > On Mon, 2018-08-06 at 10:32 +0200, Hans Verkuil wrote:
> > > > > > > On 08/06/2018 10:16 AM, Paul Kocialkowski wrote:
> > > > > > > > On Sat, 2018-08-04 at 15:50 +0200, Hans Verkuil wrote:
> > > > > > > > > Regarding point 3: I think this should be documented next to 
> > > > > > > > > the pixel format. I.e.
> > > > > > > > > the MPEG-2 Slice format used by the stateless cedrus codec 
> > > > > > > > > requires the request API
> > > > > > > > > and that two MPEG-2 controls (slice params and quantization 
> > > > > > > > > matrices) must be present
> > > > > > > > > in each request.
> > > > > > > > > 
> > > > > > > > > I am not sure a control flag (e.g. 
> > > > > > > > > V4L2_CTRL_FLAG_REQUIRED_IN_REQ) is needed here.
> > > > > > > > > It's really implied by the fact that you use a stateless 
> > > > > > > > > codec. It doesn't help
> > > > > > > > > generic applications like v4l2-ctl or qv4l2 either since in 
> > > > > > > > > order to support
> > > > > > > > > stateless codecs they will have to know about the details of 
> > > > > > > > > these controls anyway.
> > > > > > > > > 
> > > > > > > > > So I am inclined to say that it is not necessary to expose 
> > > > > > > > > this information in
> > > > > > > > > the API, but it has to be documented together with the pixel 
> > > > > > > > > format documentation.
> > > > > > > > 
> > > > > > > > I think this is affected by considerations about codec 
> > > > > > > > profile/level
> > > > > > > > support. More specifically, some controls will only be required 
> > > > > > > > for
> > > > > > > > supporting advanced codec profiles/levels, so they can only be
> > > > > > > > explicitly marked with appropriate flags by the driver when the 
> > > > > > > > target
> > > > > > > > profile/level is known. And I don't think it would be sane for 
> > > > > > > > userspace
> > > > > > > > to explicitly set what profile/level it's aiming at. As a 
> > > > > > > > result, I
> > > > > > > > don't think we can explicitly mark controls as required or 
> > > > > > > > optional.
> > > 
> > > I'm not sure this is entirely true. The hardware may need to be
> > > explicitly told what profile the video is. It may even not be the
> > > hardware, but the driver itself too, given that the profile may imply
> > > the CAPTURE pixel format, e.g. for VP9 profiles:
> > > 
> > > profile 0
> > > color depth: 8 bit/sample, chroma subsampling: 4:2:0
> > > profile 1
> > > color depth: 8 bit, chroma subsampling: 4:2:0, 4:2:2, 4:4:4
> > > profile 2
> > > color depth: 10–12 bit, chroma subsampling: 4:2:0
> > > profile 3
> > > color depth: 10–12 bit, chroma subsampling: 4:2:0, 4:2:2, 4:4:4
> > > 
> > > (reference: https://en.wikipedia.org/wiki/VP9#Profiles)
> > 
> > I think it would be fair to expect userspace to select the right
> > destination format (and maybe have the driver error if there's a
> > mismatch with the meta-data) instead of having the driver somewhat
> > expose what format should be used.
> > 
> > But maybe this would be an API violation, since all the enumerated
> > formats are probably supposed to be selectable?
> > 
> > We could also look at it the other way round and consider that selecting
> > an exposed format is always legit, but that it implies passing a
> > bitstream that matches it or the driver will error (because of an
> > invalid bitstream passed, not because of a "wrong" selected format).
> > 
> 
> The API requires the user to negotiate via TRY_FMT/S_FMT. The driver
> usually does not return error on invalid formats, and simply return
> a format it can work with. I think the kernel-user contract has to
> guarantee if the driver accepted a given format, it won't fail to
> encoder or decode.

Well, the issue here is that in order to correctly enumerate the
formats, the driver needs to be aware of:
1. in what destination format the bitstream data is decoded to;
2. what format convesions the VPU can do.

Step 1 is known by userspace but is only passed to the driver with the
bitstream metadata from controls. This is much too late for trimming the
list of supported formats.

I'm not sure that passing an extra information to the driver early to
trim the list would make sense, because it comes to down to telling the
driver what format to target and then asking the driver was format it
supports, which is rather redundant.

I think the information we need to expose to userspace is whether the
driver supports a destination format that does not match the bitstream
format. We could make it part of the spec that, by lack 

Re: [PATCHv2 0/7] vicodec improvements

2018-08-23 Thread Hans Verkuil
On 08/23/2018 09:32 AM, Hans Verkuil wrote:
> From: Hans Verkuil 
> 
> - add support for quantization parameters
> - support many more pixel formats
> - code simplifications
> - rename source and use proper prefixes for the codec: this makes it
>   independent from the vicodec driver and easier to reuse in userspace
>   (similar to what we do for the v4l2-tpg code).
> - split off the v4l2 'frontend' code for the FWHT codec into its own
>   source for easier re-use elsewhere (i.e. v4l2-ctl/qvidcap).
> 
> I made a v4l-utils branch that uses the FWHT codec to compress video
> when streaming over the network:
> 
> https://git.linuxtv.org/hverkuil/v4l-utils.git/log/?h=qvidcap
> 
> You need to add the --stream-to-host-lossy flag to enable FWHT streaming.
> 
> Note: the FWHT codec clips R/G/B values for RGB formats. This will be
> addressed later. I might have to convert the R/G/B values from full to
> limited range before encoding them, but I want to discuss this with the
> author of the codec (Tom aan de Wiel) first.

Figured out where the problem is and posted a patch 8/7 to fix this.

Since I no longer have these artifacts I also changed the --stream-to-host-lossy
argument to --stream-lossless: by default v4l2-ctl --stream-to-host will now
use the FWHT codec (if supported for the given pixelformat), unless 
--stream-lossless
is given.

Still not 100% certain about this, so this might change in the future.

Regards,

Hans

> 
> Regards,
> 
>   Hans
> 
> Changes since v1:
> 
> - added the last patch (split off v4l2 FWHT code)
> - the GOP_SIZE and QP controls can now be set during streaming as
>   well.
> 
> Hans Verkuil (7):
>   vicodec: add QP controls
>   vicodec: add support for more pixel formats
>   vicodec: simplify flags handling
>   vicodec: simplify blocktype checking
>   vicodec: improve handling of uncompressable planes
>   vicodec: rename and use proper fwht prefix for codec
>   vicodec: split off v4l2 specific parts for the codec
> 
>  .../media/uapi/v4l/pixfmt-compressed.rst  |   2 +-
>  drivers/media/platform/vicodec/Makefile   |   2 +-
>  .../vicodec/{vicodec-codec.c => codec-fwht.c} | 148 --
>  .../vicodec/{vicodec-codec.h => codec-fwht.h} |  80 ++-
>  .../media/platform/vicodec/codec-v4l2-fwht.c  | 325 
>  .../media/platform/vicodec/codec-v4l2-fwht.h  |  50 ++
>  drivers/media/platform/vicodec/vicodec-core.c | 483 --
>  7 files changed, 723 insertions(+), 367 deletions(-)
>  rename drivers/media/platform/vicodec/{vicodec-codec.c => codec-fwht.c} (85%)
>  rename drivers/media/platform/vicodec/{vicodec-codec.h => codec-fwht.h} (64%)
>  create mode 100644 drivers/media/platform/vicodec/codec-v4l2-fwht.c
>  create mode 100644 drivers/media/platform/vicodec/codec-v4l2-fwht.h
> 



[PATCHv2 8/7] codec-fwht: fix out-of-range values when decoding

2018-08-23 Thread Hans Verkuil
While decoding you need to make sure you do not get values < 0
or > 255. Note that since this code will also be used in userspace
utilities the clamp macro isn't used since that is kernel-only.

Signed-off-by: Hans Verkuil 
---
 drivers/media/platform/vicodec/codec-fwht.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/media/platform/vicodec/codec-fwht.c 
b/drivers/media/platform/vicodec/codec-fwht.c
index f91f90f7e5fc..47939160560e 100644
--- a/drivers/media/platform/vicodec/codec-fwht.c
+++ b/drivers/media/platform/vicodec/codec-fwht.c
@@ -625,8 +625,14 @@ static void fill_decoder_block(u8 *dst, const s16 *input, 
int stride)
int i, j;

for (i = 0; i < 8; i++) {
-   for (j = 0; j < 8; j++)
-   *dst++ = *input++;
+   for (j = 0; j < 8; j++, input++, dst++) {
+   if (*input < 0)
+   *dst = 0;
+   else if (*input > 255)
+   *dst = 255;
+   else
+   *dst = *input;
+   }
dst += stride - 8;
}
 }
-- 
2.18.0




Re: [RFC] Request API questions

2018-08-23 Thread Hans Verkuil
On 08/20/18 09:17, Tomasz Figa wrote:
> Hi Hans,
> 
> On Fri, Aug 17, 2018 at 7:09 PM Hans Verkuil  wrote:
>>
>> On 17/08/18 12:02, Tomasz Figa wrote:
>>> On Thu, Aug 16, 2018 at 8:15 PM Mauro Carvalho Chehab
>>>  wrote:

 Em Thu, 16 Aug 2018 12:25:25 +0200
 Hans Verkuil  escreveu:

> [snip]
> 3) Calling VIDIOC_G_EXT_CTRLS for a request that has not been queued yet 
> will
>return either the value of the control you set earlier in the request, 
> or
>the current HW control value if it was never set in the request.
>
>I believe it makes sense to return what was set in the request 
> previously
>(if you set it, you should be able to get it), but it is an idea to 
> return
>ENOENT when calling this for controls that are NOT in the request.
>
>I'm inclined to implement that. Opinions?

 Return the request "cached" value, IMO, doesn't make sense. If the
 application needs such cache, it can implement itself.
>>>
>>> Can we think about any specific use cases for a user space that first
>>> sets a control value to a request and then needs to ask the kernel to
>>> get the value back? After all, it was the user space which set the
>>> value, so I'm not sure if there is any need for the kernel to be an
>>> intermediary here.
>>>

 Return an error code if the request has not yet completed makes
 sense. Not sure what would be the best error code here... if the
 request is queued already (but not processed), EBUSY seems to be the
 better choice, but, if it was not queued yet, I'm not sure. I guess
 ENOENT would work.
>>>
>>> IMHO, as far as we assign unique error codes for different conditions
>>> and document them well, we should be okay with any not absurdly
>>> mismatched code. After all, most of those codes are defined for file
>>> system operations and don't really map directly to anything else.
>>>
>>> FYI, VIDIOC_G_(EXT_)CTRL returns EINVAL if an unsupported control is
>>> queried, so if we decided to keep the "cache" functionality after all,
>>> perhaps we should stay consistent with it?
>>> Reference: 
>>> https://www.kernel.org/doc/html/latest/media/uapi/v4l/vidioc-g-ext-ctrls.html#return-value
>>>
>>> My suggestion would be:
>>>  - EINVAL: the control was not in the request, (if we keep the cache
>>> functionality)
>>>  - EPERM: the value is not ready, (we selected this code for Decoder
>>> Interface to mean that CAPTURE format is not ready, which is similar;
>>> perhaps that could be consistent?)
>>>
>>> Note that EINVAL would only apply to writable controls, while EPERM
>>> only to volatile controls, since the latter can only change due to
>>> request completion (non-volatile controls can only change as an effect
>>> of user space action).
>>>
>>
>> I'm inclined to just always return EPERM when calling G_EXT_CTRLS for
>> a request. We can always relax this in the future.
>>
>> So when a request is not yet queued G_EXT_CTRLS returns EPERM, when
>> queued but not completed it returns EBUSY and once completed it will
>> work as it does today.
> 
> Not sure I'm following. What do we differentiate between with EPERM
> and EBUSY? In both cases the value is not available yet and for codecs
> we decided to have that signaled by EPERM.

EBUSY is only returned when you attempt to set a control that temporarily
cannot be written (usually because you are streaming and changing the
control value would break something).

Getting a control from a request that is not completed is in this proposal
just not permitted (at least for now, this might be relaxed in the future).

Thinking some more about this I believe that the correct error code to
return here is EACCES. This is currently returned if you try to get a
write-only control. Controls in a request are basically write-only
controls until the request completes, so this makes sense to me.


> On top of that, I still think we should be able to tell the case of
> querying for a control that can never show up in the request, even
> after it completes, specifically for any non-volatile control. That
> could be done by returning a different code and -EINVAL would match
> the control API behavior without requests.

Why can't you get non-volatile controls? I fail to see why volatile
or non-volatile makes any difference. It is perfectly fine to get
a non-volatile control from a completed request. I.e. if you set a
control like GAIN in a request, then you want to read back the
gain value used when the request was applied. There is no guarantee
that the driver didn't change the requested gain when it actually
applied it.

Regards,

Hans

> 
> The general logic would be like the pseudo code below:
> 
> G_EXT_CTRLS()
> {
> if ( control is not volatile )
> return -EINVAL;
> 
> if ( request is not complete )
> return -EPERM;
> 
> return control from the request;
> }
> 
> Best regards,
> Tomasz
> 



Re: [RFC] Request API questions

2018-08-23 Thread Hans Verkuil
Hi all,

Based on the discussion on the mailinglist I came to the following conclusions
which I will be implementing on top of the reqv18 patches:

On 08/16/18 12:25, Hans Verkuil wrote:
> Laurent raised a few API issues/questions in his review of the documentation.
> 
> I've consolidated those in this RFC. I would like to know what others think
> and if I should make changes.
> 
> 1) Should you be allowed to set controls directly if they are also used in
>requests? Right now this is allowed, although we warn in the spec that
>this can lead to undefined behavior.
> 
>In my experience being able to do this is very useful while testing,
>and restricting this is not all that easy to implement. I also think it is
>not our job. It is not as if something will break when you do this.
> 
>If there really is a good reason why you can't mix this for a specific
>control, then the driver can check this and return -EBUSY.

We allow this. The warning in the spec is sufficient and there are legitimate
use-cases where you want to be able to do this.

> 
> 2) If request_fd in QBUF or the control ioctls is not a request fd, then we
>now return ENOENT. Laurent suggests using EBADR ('Invalid request 
> descriptor')
>instead. This seems like a good idea to me. Should I change this?

Mauro wasn't too keen on it and EBADR appears to be arch dependent (different
values for different architectures).

I think using EBADR is a bit too risky.

However, I do agree with Laurent that ENOENT isn't the best error code. We have 
a
similar situation with vb2 and dmabuf if the fd for a dmabuf is invalid. In that
case vb2 returns -EINVAL, but it also issues a dprintk in that case so it is a
bit easier to discover the reason for EINVAL. I'll do the same.

> 
> 3) Calling VIDIOC_G_EXT_CTRLS for a request that has not been queued yet will
>return either the value of the control you set earlier in the request, or
>the current HW control value if it was never set in the request.
> 
>I believe it makes sense to return what was set in the request previously
>(if you set it, you should be able to get it), but it is an idea to return
>ENOENT when calling this for controls that are NOT in the request.
> 
>I'm inclined to implement that. Opinions?

The consensus is to prohibit this, at least for now. So attempts to get controls
from a request that is not in completed state will return -EACCES.

> 
> 4) When queueing a buffer to a request with VIDIOC_QBUF you set 
> V4L2_BUF_FLAG_REQUEST_FD
>to indicate a valid request_fd. For other queue ioctls that take a struct 
> v4l2_buffer
>this flag and the request_fd field are just ignored. Should we return 
> EINVAL
>instead if the flag is set for those ioctls?
> 
>The argument for just ignoring it is that older kernels that do not know 
> about
>this flag will ignore it as well. There is no check against unknown flags.

We'll just leave this as-is. I'll add a note to the spec that userspace should 
clear this
flag for ioctls other than QBUF.

Regards,

Hans


Re: [PATCH] media: aptina-pll: allow approximating the requested pix_clock

2018-08-23 Thread Laurent Pinchart
Hi Helmut,

Thank you for the patch.

On Thursday, 23 August 2018 10:52:09 EEST Helmut Grohne wrote:
> Clock frequencies are not exact values, but rather imprecise, physical
> properties. The present pll computation however, treats them as exact.
> It tries to compute parameters that attain the requested pix_clock
> exactly. Failing that, it gives up.
> 
> The new implementation approximates the requested pix_clock and reports
> the actual value resulting from the computed parameters. If the
> requested pix_clock can be attained, the original behaviour of
> maximizing p1 is retained.
> 
> Experiments with the algorithm in userspace on an arm device show that
> the computational cost is negligible compared to executing a binary for
> all practical inputs.

Could you please share numbers, ideally when run in kernel space ?

> Signed-off-by: Helmut Grohne 
> ---
>  drivers/media/i2c/aptina-pll.c | 263 ++---
>  drivers/media/i2c/mt9m032.c|  15 ++-
>  2 files changed, 165 insertions(+), 113 deletions(-)
> 
> I tried using aptina_pll_calculate in a driver for a similar image sensor.
> Using it, I was unable to find a pix_clock value such that the computation
> was successful. Most of the practical parameters result in a non-integer
> pix_clock, something that struct pll cannot represent.
> 
> The driver is not ready for submission at this point, but the changes to
> aptina_pll_calculate are reasonable on their own, which is why I submit them
> separately now.

This patch is very hard to review as you rewrite the whole, removing all the 
documentation for the existing algorithm, without documenting the new one. 
There's also no example of a failing case with the existing code that works 
with yours.

Could you please document the algorithm in details (especially explaining the 
background ideas), and share at least one use case, with with numbers for all 
the input and output parameters ?

> diff --git a/drivers/media/i2c/aptina-pll.c b/drivers/media/i2c/aptina-pll.c
> index 224ae4e4cf8b..249018772b2b 100644
> --- a/drivers/media/i2c/aptina-pll.c
> +++ b/drivers/media/i2c/aptina-pll.c
> @@ -2,6 +2,7 @@
>   * Aptina Sensor PLL Configuration
>   *
>   * Copyright (C) 2012 Laurent Pinchart 
> + * Copyright (C) 2018 Intenta GmbH
>   *
>   * This program is free software; you can redistribute it and/or
>   * modify it under the terms of the GNU General Public License
> @@ -14,23 +15,84 @@
>   */
> 
>  #include 
> -#include 
>  #include 
> -#include 
> +#include 
>  #include 
> 
>  #include "aptina-pll.h"
> 
> +/* A variant of mult_frac that works even when (x % denom) * numer produces
> a + * 32bit overflow.
> + */
> +#define mult_frac_exact(x, numer, denom) \
> + ((u32)div_u64(mul_u32_u32((x), (numer)), (denom)))
> +
> +static unsigned int absdiff(unsigned int x, unsigned int y)
> +{
> + return x > y ? x - y : y - x;
> +}
> +
> +/* Find n_min <= *np <= n_max and d_min <= *dp <= d_max such that *np / *dp
> + * approximates n_target / d_target.
> + */
> +static int approximate_fraction(unsigned int n_min, unsigned int n_max,
> + unsigned int d_min, unsigned int d_max,
> + unsigned int n_target, unsigned int d_target,
> + unsigned int *np, unsigned int *dp)
> +{
> + unsigned int d, best_err = UINT_MAX;
> +
> + d_min = max(d_min, mult_frac_exact(n_min, d_target, n_target));
> + d_max = min(d_max, mult_frac_exact(n_max, d_target, n_target) + 1);
> + if (d_min > d_max)
> + return -EINVAL;
> +
> + for (d = d_min; d < d_max; ++d) {
> + unsigned int n = mult_frac_exact(d, n_target, d_target);
> +
> + if (n >= n_min) {
> + unsigned int err = absdiff(
> + n_target, mult_frac_exact(n, d_target, d));
> +
> + if (err < best_err) {
> + best_err = err;
> + *np = n;
> + *dp = d;
> + }
> + if (err == 0)
> + return 0;
> + }
> + ++n;
> + if (n <= n_max) {
> + unsigned int err = absdiff(
> + n_target, mult_frac_exact(n, d_target, d));
> +
> + if (err < best_err) {
> + best_err = err;
> + *np = n;
> + *dp = d;
> + }
> + }
> + }
> + return best_err == UINT_MAX ? -EINVAL : 0;
> +}
> +
> +/* Find parameters n, m, p1 such that:
> + *   n_min <= n <= n_max
> + *   m_min <= m <= m_max
> + *   p1_min <= p1 <= p1_max, even
> + *   int_clock_min <= ext_clock / n <= int_clock_max
> + *   out_clock_min <= ext_clock / n * m <= out_clock_max
> + *   pix_clock = ext_clock / n * m / p1 (approximately)
> + *   p1 is maximized
> + * Re

Re: [PATCH] media: aptina-pll: allow approximating the requested pix_clock

2018-08-23 Thread Sakari Ailus
Hi Helmut,

On Thu, Aug 23, 2018 at 09:52:09AM +0200, Helmut Grohne wrote:
> Clock frequencies are not exact values, but rather imprecise, physical
> properties. The present pll computation however, treats them as exact.
> It tries to compute parameters that attain the requested pix_clock
> exactly. Failing that, it gives up.
> 
> The new implementation approximates the requested pix_clock and reports
> the actual value resulting from the computed parameters. If the
> requested pix_clock can be attained, the original behaviour of
> maximizing p1 is retained.
> 
> Experiments with the algorithm in userspace on an arm device show that
> the computational cost is negligible compared to executing a binary for
> all practical inputs.
> 
> Signed-off-by: Helmut Grohne 
> ---
>  drivers/media/i2c/aptina-pll.c | 263 
> -
>  drivers/media/i2c/mt9m032.c|  15 ++-
>  2 files changed, 165 insertions(+), 113 deletions(-)
> 
> I tried using aptina_pll_calculate in a driver for a similar image sensor.
> Using it, I was unable to find a pix_clock value such that the computation was
> successful. Most of the practical parameters result in a non-integer 
> pix_clock,
> something that struct pll cannot represent.

Knowing the formula, the limits as well as the external clock frequency, it
should be relatively straightforward to come up with a functional pixel
clock value. Was there a practical problem in coming up with such a value?

You can't pick a value at random; it needs to be one that actually works.
The purpose of having an exact frequency is that the typical systems where
these devices are being used, as I explained earlier, is that having a
random frequency, albeit with which the sensor could possibly work, the
other devices in the system may not do so.

The importantce of the calculator is more apparent for devices that use
serial busses with a variable number of lanes etc. where the pixel format
affects the pixel rate whereas the link frequency stays unchanged.

> 
> The driver is not ready for submission at this point, but the changes to
> aptina_pll_calculate are reasonable on their own, which is why I submit them
> separately now.
> 
> Helmut
> 
> diff --git a/drivers/media/i2c/aptina-pll.c b/drivers/media/i2c/aptina-pll.c
> index 224ae4e4cf8b..249018772b2b 100644
> --- a/drivers/media/i2c/aptina-pll.c
> +++ b/drivers/media/i2c/aptina-pll.c
> @@ -2,6 +2,7 @@
>   * Aptina Sensor PLL Configuration
>   *
>   * Copyright (C) 2012 Laurent Pinchart 
> + * Copyright (C) 2018 Intenta GmbH
>   *
>   * This program is free software; you can redistribute it and/or
>   * modify it under the terms of the GNU General Public License
> @@ -14,23 +15,84 @@
>   */
>  
>  #include 
> -#include 
>  #include 
> -#include 
> +#include 
>  #include 
>  
>  #include "aptina-pll.h"
>  
> +/* A variant of mult_frac that works even when (x % denom) * numer produces a
> + * 32bit overflow.
> + */
> +#define mult_frac_exact(x, numer, denom) \
> + ((u32)div_u64(mul_u32_u32((x), (numer)), (denom)))
> +
> +static unsigned int absdiff(unsigned int x, unsigned int y)
> +{
> + return x > y ? x - y : y - x;
> +}
> +
> +/* Find n_min <= *np <= n_max and d_min <= *dp <= d_max such that *np / *dp
> + * approximates n_target / d_target.
> + */
> +static int approximate_fraction(unsigned int n_min, unsigned int n_max,
> + unsigned int d_min, unsigned int d_max,
> + unsigned int n_target, unsigned int d_target,
> + unsigned int *np, unsigned int *dp)
> +{
> + unsigned int d, best_err = UINT_MAX;
> +
> + d_min = max(d_min, mult_frac_exact(n_min, d_target, n_target));
> + d_max = min(d_max, mult_frac_exact(n_max, d_target, n_target) + 1);
> + if (d_min > d_max)
> + return -EINVAL;
> +
> + for (d = d_min; d < d_max; ++d) {
> + unsigned int n = mult_frac_exact(d, n_target, d_target);
> +
> + if (n >= n_min) {
> + unsigned int err = absdiff(
> + n_target, mult_frac_exact(n, d_target, d));
> +
> + if (err < best_err) {
> + best_err = err;
> + *np = n;
> + *dp = d;
> + }
> + if (err == 0)
> + return 0;
> + }
> + ++n;
> + if (n <= n_max) {
> + unsigned int err = absdiff(
> + n_target, mult_frac_exact(n, d_target, d));
> +
> + if (err < best_err) {
> + best_err = err;
> + *np = n;
> + *dp = d;
> + }
> + }
> + }
> + return best_err == UINT_MAX ? -EINVAL : 0;
> +}
> +
> +/* Find parameters n, m, p1 such that:
> + *   n_min <= n <= n_max

[PATCH] video_function_calls.rst: drop obsolete video-set-attributes reference

2018-08-23 Thread Hans Verkuil
This fixes this warning:

Documentation/media/uapi/dvb/video_function_calls.rst:9: WARNING: toctree 
contains
reference to nonexisting document 'uapi/dvb/video-set-attributes'

Signed-off-by: Hans Verkuil 
---
diff --git a/Documentation/media/uapi/dvb/video_function_calls.rst 
b/Documentation/media/uapi/dvb/video_function_calls.rst
index 3f4f6c9ffad7..a4222b6cd2d3 100644
--- a/Documentation/media/uapi/dvb/video_function_calls.rst
+++ b/Documentation/media/uapi/dvb/video_function_calls.rst
@@ -33,4 +33,3 @@ Video Function Calls
 video-clear-buffer
 video-set-streamtype
 video-set-format
-video-set-attributes


Re: [RFC] Request API and V4L2 capabilities

2018-08-23 Thread Hans Verkuil
Hi all,

After reading through the comments I came to the following conclusions:

On 08/04/18 15:50, Hans Verkuil wrote:
> Hi all,
> 
> While the Request API patch series addresses all the core API issues, there
> are some high-level considerations as well:
> 
> 1) How can the application tell that the Request API is supported and for
>which buffer types (capture/output) and pixel formats?
> 
> 2) How can the application tell if the Request API is required as opposed to 
> being
>optional?
> 
> 3) Some controls may be required in each request, how to let userspace know 
> this?
>Is it even necessary to inform userspace?
> 
> 4) (For bonus points): How to let the application know which streaming I/O 
> modes
>are available? That's never been possible before, but it would be very nice
>indeed if that's made explicit.
> 
> Since the Request API associates data with frame buffers it makes sense to 
> expose
> this as a new capability field in struct v4l2_requestbuffers and struct 
> v4l2_create_buffers.
> 
> The first struct has 2 reserved fields, the second has 8, so it's not a 
> problem to
> take one for a capability field. Both structs also have a buffer type, so we 
> know
> if this is requested for a capture or output buffer type. The pixel format is 
> known
> in the driver, so HAS/REQUIRES_REQUESTS can be set based on that. I doubt 
> we'll have
> drivers where the request caps would actually depend on the pixel format, but 
> it
> theoretically possible. For both ioctls you can call them with count=0 at the 
> start
> of the application. REQBUFS has of course the side-effect of deleting all 
> buffers,
> but at the start of your application you don't have any yet. CREATE_BUFS has 
> no
> side-effects.
> 
> I propose adding these capabilities:
> 
> #define V4L2_BUF_CAP_HAS_REQUESTS 0x0001
> #define V4L2_BUF_CAP_REQUIRES_REQUESTS0x0002
> #define V4L2_BUF_CAP_HAS_MMAP 0x0100
> #define V4L2_BUF_CAP_HAS_USERPTR  0x0200
> #define V4L2_BUF_CAP_HAS_DMABUF   0x0400

I substituted SUPPORTS for HAS and dropped the REQUIRES_REQUESTS capability.
As Tomasz mentioned, technically (at least for stateless codecs) you could
handle just one frame at a time without using requests. It's very inefficient,
but it would work.

Otherwise I have implemented this as specified above.

> 
> If REQUIRES_REQUESTS is set, then HAS_REQUESTS is also set.
> 
> At this time I think that REQUIRES_REQUESTS would only need to be set for the
> output queue of stateless codecs.
> 
> If capabilities is 0, then it's from an old kernel and all you know is that
> requests are certainly not supported, and that MMAP is supported. Whether 
> USERPTR
> or DMABUF are supported isn't known in that case (just try it :-) ).
> 
> Strictly speaking we do not need these HAS_MMAP/USERPTR/DMABUF caps, but it 
> is very
> easy to add if we create a new capability field anyway, and it has always 
> annoyed
> the hell out of me that we didn't have a good way to let userspace know what
> streaming I/O modes we support. And with vb2 it's easy to implement.
> 
> Regarding point 3: I think this should be documented next to the pixel 
> format. I.e.
> the MPEG-2 Slice format used by the stateless cedrus codec requires the 
> request API
> and that two MPEG-2 controls (slice params and quantization matrices) must be 
> present
> in each request.

Everyone seemed to agree with this: which controls are required to be present 
in a
request should be documented as part of the corresponding pixel format.

> I am not sure a control flag (e.g. V4L2_CTRL_FLAG_REQUIRED_IN_REQ) is needed 
> here.

Nobody liked this, so this proposed flag is dropped.

Regards,

Hans

> It's really implied by the fact that you use a stateless codec. It doesn't 
> help
> generic applications like v4l2-ctl or qv4l2 either since in order to support
> stateless codecs they will have to know about the details of these controls 
> anyway.
> 
> So I am inclined to say that it is not necessary to expose this information in
> the API, but it has to be documented together with the pixel format 
> documentation.
> 
> Comments? Ideas?
> 
> Regards,
> 
>   Hans
> 



Re: [PATCH 4/4] venus: firmware: register separate platform_device for firmware loader

2018-08-23 Thread Vikash Garodia

Hi Stanimir,

On 2018-08-22 18:07, Stanimir Varbanov wrote:

Hi Vikash,

Could you give a try below patch on your environment? You should keep
your 1/4 to 3/4 patches and replace your 4/4 with the below one.

You have to drop the compatible string in firmware DT subnode (keep 
only

iommus).


I have successfully tested this patch on my environment without tz. I 
have

post a new series by including below patch. Thank you Stanimir for your
review and below patch.


On 08/22/2018 03:34 PM, Stanimir Varbanov wrote:

This registers a firmware platform_device and associate it with
video-firmware DT subnode. Then calls dma configure to initialize
dma and iommu.

Signed-off-by: Stanimir Varbanov 
---
 .../devicetree/bindings/media/qcom,venus.txt   | 13 +-
 drivers/media/platform/qcom/venus/core.c   | 14 +--
 drivers/media/platform/qcom/venus/firmware.c   | 49 
++

 drivers/media/platform/qcom/venus/firmware.h   |  2 +
 4 files changed, 73 insertions(+), 5 deletions(-)





Re: [RFC] Request API and V4L2 capabilities

2018-08-23 Thread Nicolas Dufresne
Le jeudi 23 août 2018 à 10:05 +0200, Paul Kocialkowski a écrit :
> Hi,
> 
> On Wed, 2018-08-22 at 14:33 -0300, Ezequiel Garcia wrote:
> > On Wed, 2018-08-22 at 16:10 +0200, Paul Kocialkowski wrote:
> > > Hi,
> > > 
> > > On Tue, 2018-08-21 at 17:52 +0900, Tomasz Figa wrote:
> > > > Hi Hans, Paul,
> > > > 
> > > > On Mon, Aug 6, 2018 at 6:29 PM Paul Kocialkowski
> > > >  wrote:
> > > > > 
> > > > > On Mon, 2018-08-06 at 11:23 +0200, Hans Verkuil wrote:
> > > > > > On 08/06/2018 11:13 AM, Paul Kocialkowski wrote:
> > > > > > > Hi,
> > > > > > > 
> > > > > > > On Mon, 2018-08-06 at 10:32 +0200, Hans Verkuil wrote:
> > > > > > > > On 08/06/2018 10:16 AM, Paul Kocialkowski wrote:
> > > > > > > > > On Sat, 2018-08-04 at 15:50 +0200, Hans Verkuil wrote:
> > > > > > > > > > Regarding point 3: I think this should be documented next 
> > > > > > > > > > to the pixel format. I.e.
> > > > > > > > > > the MPEG-2 Slice format used by the stateless cedrus codec 
> > > > > > > > > > requires the request API
> > > > > > > > > > and that two MPEG-2 controls (slice params and quantization 
> > > > > > > > > > matrices) must be present
> > > > > > > > > > in each request.
> > > > > > > > > > 
> > > > > > > > > > I am not sure a control flag (e.g. 
> > > > > > > > > > V4L2_CTRL_FLAG_REQUIRED_IN_REQ) is needed here.
> > > > > > > > > > It's really implied by the fact that you use a stateless 
> > > > > > > > > > codec. It doesn't help
> > > > > > > > > > generic applications like v4l2-ctl or qv4l2 either since in 
> > > > > > > > > > order to support
> > > > > > > > > > stateless codecs they will have to know about the details 
> > > > > > > > > > of these controls anyway.
> > > > > > > > > > 
> > > > > > > > > > So I am inclined to say that it is not necessary to expose 
> > > > > > > > > > this information in
> > > > > > > > > > the API, but it has to be documented together with the 
> > > > > > > > > > pixel format documentation.
> > > > > > > > > 
> > > > > > > > > I think this is affected by considerations about codec 
> > > > > > > > > profile/level
> > > > > > > > > support. More specifically, some controls will only be 
> > > > > > > > > required for
> > > > > > > > > supporting advanced codec profiles/levels, so they can only be
> > > > > > > > > explicitly marked with appropriate flags by the driver when 
> > > > > > > > > the target
> > > > > > > > > profile/level is known. And I don't think it would be sane 
> > > > > > > > > for userspace
> > > > > > > > > to explicitly set what profile/level it's aiming at. As a 
> > > > > > > > > result, I
> > > > > > > > > don't think we can explicitly mark controls as required or 
> > > > > > > > > optional.
> > > > 
> > > > I'm not sure this is entirely true. The hardware may need to be
> > > > explicitly told what profile the video is. It may even not be the
> > > > hardware, but the driver itself too, given that the profile may imply
> > > > the CAPTURE pixel format, e.g. for VP9 profiles:
> > > > 
> > > > profile 0
> > > > color depth: 8 bit/sample, chroma subsampling: 4:2:0
> > > > profile 1
> > > > color depth: 8 bit, chroma subsampling: 4:2:0, 4:2:2, 4:4:4
> > > > profile 2
> > > > color depth: 10–12 bit, chroma subsampling: 4:2:0
> > > > profile 3
> > > > color depth: 10–12 bit, chroma subsampling: 4:2:0, 4:2:2, 4:4:4
> > > > 
> > > > (reference: https://en.wikipedia.org/wiki/VP9#Profiles)
> > > 
> > > I think it would be fair to expect userspace to select the right
> > > destination format (and maybe have the driver error if there's a
> > > mismatch with the meta-data) instead of having the driver somewhat
> > > expose what format should be used.
> > > 
> > > But maybe this would be an API violation, since all the enumerated
> > > formats are probably supposed to be selectable?
> > > 
> > > We could also look at it the other way round and consider that selecting
> > > an exposed format is always legit, but that it implies passing a
> > > bitstream that matches it or the driver will error (because of an
> > > invalid bitstream passed, not because of a "wrong" selected format).
> > > 
> > 
> > The API requires the user to negotiate via TRY_FMT/S_FMT. The driver
> > usually does not return error on invalid formats, and simply return
> > a format it can work with. I think the kernel-user contract has to
> > guarantee if the driver accepted a given format, it won't fail to
> > encoder or decode.
> 
> Well, the issue here is that in order to correctly enumerate the
> formats, the driver needs to be aware of:
> 1. in what destination format the bitstream data is decoded to;

This is covered by the state-full specification patch if I remember
correctly. So the driver, if it's a multi-format, will first return all
possible formats, and later on, will return the proper subset. So let's
take an encoder, after setting the capture format, the enumeration of
the raw formats could then be limited to what is supported for this
output. For an H264 encoder, what could also affect this l

Re: [RFC] Request API and V4L2 capabilities

2018-08-23 Thread Nicolas Dufresne
Le jeudi 23 août 2018 à 16:31 +0200, Hans Verkuil a écrit :
> > I propose adding these capabilities:
> > 
> > #define V4L2_BUF_CAP_HAS_REQUESTS 0x0001
> > #define V4L2_BUF_CAP_REQUIRES_REQUESTS0x0002
> > #define V4L2_BUF_CAP_HAS_MMAP 0x0100
> > #define V4L2_BUF_CAP_HAS_USERPTR  0x0200
> > #define V4L2_BUF_CAP_HAS_DMABUF   0x0400
> 
> I substituted SUPPORTS for HAS and dropped the REQUIRES_REQUESTS capability.
> As Tomasz mentioned, technically (at least for stateless codecs) you could
> handle just one frame at a time without using requests. It's very inefficient,
> but it would work.

I thought the request was providing a data structure to refer back to
the frames, so each codec don't have to implement one. Do you mean that
the framework will implicitly request requests in that mode ? or simply
that there is no such helper ?


signature.asc
Description: This is a digitally signed message part


cron job: media_tree daily build: OK

2018-08-23 Thread Hans Verkuil
This message is generated daily by a cron job that builds media_tree for
the kernels and architectures in the list below.

Results of the daily build of media_tree:

date:   Fri Aug 24 05:00:15 CEST 2018
media-tree git hash:da2048b7348a0be92f706ac019e022139e29495e
media_build git hash:   baf45935ffad914f33faf751ad9f4d0dd276c021
v4l-utils git hash: 015ca7524748fa7cef296102c68b631b078b63c6
edid-decode git hash:   b2da1516df3cc2756bfe8d1fa06d7bf2562ba1f4
gcc version:i686-linux-gcc (GCC) 8.1.0
sparse version: 0.5.2
smatch version: 0.5.1
host hardware:  x86_64
host os:4.17.0-1-amd64

linux-git-arm-at91: OK
linux-git-arm-davinci: OK
linux-git-arm-multi: OK
linux-git-arm-pxa: OK
linux-git-arm-stm32: OK
linux-git-arm64: OK
linux-git-i686: OK
linux-git-mips: OK
linux-git-powerpc64: OK
linux-git-sh: OK
linux-git-x86_64: OK
Check COMPILE_TEST: OK
linux-2.6.36.4-i686: OK
linux-2.6.36.4-x86_64: OK
linux-2.6.37.6-i686: OK
linux-2.6.37.6-x86_64: OK
linux-2.6.38.8-i686: OK
linux-2.6.38.8-x86_64: OK
linux-2.6.39.4-i686: OK
linux-2.6.39.4-x86_64: OK
linux-3.0.101-i686: OK
linux-3.0.101-x86_64: OK
linux-3.1.10-i686: OK
linux-3.1.10-x86_64: OK
linux-3.2.102-i686: OK
linux-3.2.102-x86_64: OK
linux-3.3.8-i686: OK
linux-3.3.8-x86_64: OK
linux-3.4.113-i686: OK
linux-3.4.113-x86_64: OK
linux-3.5.7-i686: OK
linux-3.5.7-x86_64: OK
linux-3.6.11-i686: OK
linux-3.6.11-x86_64: OK
linux-3.7.10-i686: OK
linux-3.7.10-x86_64: OK
linux-3.8.13-i686: OK
linux-3.8.13-x86_64: OK
linux-3.9.11-i686: OK
linux-3.9.11-x86_64: OK
linux-3.10.108-i686: OK
linux-3.10.108-x86_64: OK
linux-3.11.10-i686: OK
linux-3.11.10-x86_64: OK
linux-3.12.74-i686: OK
linux-3.12.74-x86_64: OK
linux-3.13.11-i686: OK
linux-3.13.11-x86_64: OK
linux-3.14.79-i686: OK
linux-3.14.79-x86_64: OK
linux-3.15.10-i686: OK
linux-3.15.10-x86_64: OK
linux-3.16.57-i686: OK
linux-3.16.57-x86_64: OK
linux-3.17.8-i686: OK
linux-3.17.8-x86_64: OK
linux-3.18.115-i686: OK
linux-3.18.115-x86_64: OK
linux-3.19.8-i686: OK
linux-3.19.8-x86_64: OK
linux-4.18-i686: OK
linux-4.18-x86_64: OK
apps: OK
spec-git: OK
sparse: WARNINGS

Detailed results are available here:

http://www.xs4all.nl/~hverkuil/logs/Friday.log

Full logs are available here:

http://www.xs4all.nl/~hverkuil/logs/Friday.tar.bz2

The Media Infrastructure API from this daily build is here:

http://www.xs4all.nl/~hverkuil/spec/index.html