[RESEND v7 1/2] media: venus: Add codec data table

2019-09-23 Thread Aniket Masule
Add vpp cycles for different types of codec.
It indicates the cycles required by video hardware
to process each macroblock. Add vsp cycles, cycles
required by stream processor. Initialize the codec
data with core resources.

Signed-off-by: Aniket Masule 
---
 drivers/media/platform/qcom/venus/core.c| 13 +
 drivers/media/platform/qcom/venus/core.h| 16 +++
 drivers/media/platform/qcom/venus/helpers.c | 30 +
 drivers/media/platform/qcom/venus/helpers.h |  1 +
 drivers/media/platform/qcom/venus/vdec.c|  4 
 drivers/media/platform/qcom/venus/venc.c|  4 
 6 files changed, 68 insertions(+)

diff --git a/drivers/media/platform/qcom/venus/core.c 
b/drivers/media/platform/qcom/venus/core.c
index 19cbe9d..49d32b2 100644
--- a/drivers/media/platform/qcom/venus/core.c
+++ b/drivers/media/platform/qcom/venus/core.c
@@ -480,6 +480,17 @@ static __maybe_unused int venus_runtime_resume(struct 
device *dev)
{  244800, 1 }, /* 1920x1080@30 */
 };
 
+static struct codec_freq_data sdm845_codec_freq_data[] =  {
+   { V4L2_PIX_FMT_H264, VIDC_SESSION_TYPE_ENC, 675, 10 },
+   { V4L2_PIX_FMT_HEVC, VIDC_SESSION_TYPE_ENC, 675, 10 },
+   { V4L2_PIX_FMT_VP8, VIDC_SESSION_TYPE_ENC, 675, 10 },
+   { V4L2_PIX_FMT_MPEG2, VIDC_SESSION_TYPE_DEC, 200, 10 },
+   { V4L2_PIX_FMT_H264, VIDC_SESSION_TYPE_DEC, 200, 10 },
+   { V4L2_PIX_FMT_HEVC, VIDC_SESSION_TYPE_DEC, 200, 10 },
+   { V4L2_PIX_FMT_VP8, VIDC_SESSION_TYPE_DEC, 200, 10 },
+   { V4L2_PIX_FMT_VP9, VIDC_SESSION_TYPE_DEC, 200, 10 },
+};
+
 static const struct bw_tbl sdm845_bw_table_enc[] = {
{ 1944000, 1612000, 0, 2416000, 0 },/* 3840x2160@60 */
{  972000,  951000, 0, 1434000, 0 },/* 3840x2160@30 */
@@ -501,6 +512,8 @@ static __maybe_unused int venus_runtime_resume(struct 
device *dev)
.bw_tbl_enc_size = ARRAY_SIZE(sdm845_bw_table_enc),
.bw_tbl_dec = sdm845_bw_table_dec,
.bw_tbl_dec_size = ARRAY_SIZE(sdm845_bw_table_dec),
+   .codec_freq_data = sdm845_codec_freq_data,
+   .codec_freq_data_size = ARRAY_SIZE(sdm845_codec_freq_data),
.clks = {"core", "iface", "bus" },
.clks_num = 3,
.max_load = 3110400,/* 4096x2160@90 */
diff --git a/drivers/media/platform/qcom/venus/core.h 
b/drivers/media/platform/qcom/venus/core.h
index 13e35f3..684a950 100644
--- a/drivers/media/platform/qcom/venus/core.h
+++ b/drivers/media/platform/qcom/venus/core.h
@@ -26,6 +26,13 @@ struct reg_val {
u32 value;
 };
 
+struct codec_freq_data {
+   u32 pixfmt;
+   u32 session_type;
+   unsigned long vpp_freq;
+   unsigned long vsp_freq;
+};
+
 struct bw_tbl {
u32 mbs_per_sec;
u32 avg;
@@ -44,6 +51,8 @@ struct venus_resources {
unsigned int bw_tbl_dec_size;
const struct reg_val *reg_tbl;
unsigned int reg_tbl_size;
+   const struct codec_freq_data *codec_freq_data;
+   unsigned int codec_freq_data_size;
const char * const clks[VIDC_CLKS_NUM_MAX];
unsigned int clks_num;
enum hfi_version hfi_version;
@@ -221,6 +230,12 @@ struct venus_buffer {
struct list_head ref_list;
 };
 
+struct clock_data {
+   u32 core_id;
+   unsigned long freq;
+   const struct codec_freq_data *codec_freq_data;
+};
+
 #define to_venus_buffer(ptr)   container_of(ptr, struct venus_buffer, vb)
 
 enum venus_dec_state {
@@ -301,6 +316,7 @@ struct venus_inst {
struct list_head list;
struct mutex lock;
struct venus_core *core;
+   struct clock_data clk_data;
struct list_head dpbbufs;
struct list_head internalbufs;
struct list_head registeredbufs;
diff --git a/drivers/media/platform/qcom/venus/helpers.c 
b/drivers/media/platform/qcom/venus/helpers.c
index 4320ea9..71af237 100644
--- a/drivers/media/platform/qcom/venus/helpers.c
+++ b/drivers/media/platform/qcom/venus/helpers.c
@@ -883,6 +883,36 @@ int venus_helper_set_core_usage(struct venus_inst *inst, 
u32 usage)
 }
 EXPORT_SYMBOL_GPL(venus_helper_set_core_usage);
 
+int venus_helper_init_codec_freq_data(struct venus_inst *inst)
+{
+   const struct codec_freq_data *data;
+   unsigned int i, data_size;
+   u32 pixfmt;
+   int ret = 0;
+
+   if (!IS_V4(inst->core))
+   return 0;
+
+   data = inst->core->res->codec_freq_data;
+   data_size = inst->core->res->codec_freq_data_size;
+   pixfmt = inst->session_type == VIDC_SESSION_TYPE_DEC ?
+   inst->fmt_out->pixfmt : inst->fmt_cap->pixfmt;
+
+   for (i = 0; i < data_size; i++) {
+   if (data[i].pixfmt == pixfmt &&
+   data[i].session_type == inst->session_type) {
+   inst->clk_data.codec_freq_data = [i];
+   break;
+   }
+   }
+
+   if (!

[RESEND v7 2/2] media: venus: Update clock scaling

2019-09-23 Thread Aniket Masule
Current clock scaling calculations are same for vpu4 and
previous versions. For vpu4, Clock scaling calculations
are updated with cycles/mb and bitrate. This helps in
getting precise clock required.

Signed-off-by: Aniket Masule 
---
 drivers/media/platform/qcom/venus/helpers.c | 157 
 drivers/media/platform/qcom/venus/helpers.h |   2 +-
 drivers/media/platform/qcom/venus/vdec.c|   4 +-
 3 files changed, 140 insertions(+), 23 deletions(-)

diff --git a/drivers/media/platform/qcom/venus/helpers.c 
b/drivers/media/platform/qcom/venus/helpers.c
index 71af237..e8476f5 100644
--- a/drivers/media/platform/qcom/venus/helpers.c
+++ b/drivers/media/platform/qcom/venus/helpers.c
@@ -448,12 +448,32 @@ static int load_scale_bw(struct venus_core *core)
return icc_set_bw(core->video_path, total_avg, total_peak);
 }
 
-int venus_helper_load_scale_clocks(struct venus_core *core)
+static int set_clk_freq(struct venus_core *core, unsigned long freq)
 {
+   struct clk *clk = core->clks[0];
+   int ret;
+
+   ret = clk_set_rate(clk, freq);
+   if (ret)
+   return ret;
+
+   ret = clk_set_rate(core->core0_clk, freq);
+   if (ret)
+   return ret;
+
+   ret = clk_set_rate(core->core1_clk, freq);
+   if (ret)
+   return ret;
+
+   return 0;
+}
+
+static int scale_clocks(struct venus_inst *inst)
+{
+   struct venus_core *core = inst->core;
const struct freq_tbl *table = core->res->freq_tbl;
unsigned int num_rows = core->res->freq_tbl_size;
unsigned long freq = table[0].freq;
-   struct clk *clk = core->clks[0];
struct device *dev = core->dev;
u32 mbs_per_sec;
unsigned int i;
@@ -479,28 +499,123 @@ int venus_helper_load_scale_clocks(struct venus_core 
*core)
 
 set_freq:
 
-   ret = clk_set_rate(clk, freq);
-   if (ret)
-   goto err;
+   ret = set_clk_freq(core, freq);
+   if (ret) {
+   dev_err(dev, "failed to set clock rate %lu (%d)\n",
+   freq, ret);
+   return ret;
+   }
 
-   ret = clk_set_rate(core->core0_clk, freq);
-   if (ret)
-   goto err;
+   ret = load_scale_bw(core);
+   if (ret) {
+   dev_err(dev, "failed to set bandwidth (%d)\n",
+   ret);
+   return ret;
+   }
 
-   ret = clk_set_rate(core->core1_clk, freq);
-   if (ret)
-   goto err;
+   return 0;
+}
+
+static unsigned long calculate_inst_freq(struct venus_inst *inst,
+unsigned long filled_len)
+{
+   unsigned long vpp_freq = 0, vsp_freq = 0;
+   u64 fps = inst->fps;
+   u32 mbs_per_sec;
+
+   mbs_per_sec = load_per_instance(inst) / inst->fps;
+   vpp_freq = mbs_per_sec * inst->clk_data.codec_freq_data->vpp_freq;
+   /* 21 / 20 is overhead factor */
+   vpp_freq += vpp_freq / 20;
+   vsp_freq = mbs_per_sec * inst->clk_data.codec_freq_data->vsp_freq;
+
+   /* 10 / 7 is overhead factor */
+   if (inst->session_type == VIDC_SESSION_TYPE_ENC)
+   vsp_freq += (inst->controls.enc.bitrate * 10) / 7;
+   else
+   vsp_freq += ((fps * filled_len * 8) * 10) / 7;
+
+   return max(vpp_freq, vsp_freq);
+}
+
+static int scale_clocks_v4(struct venus_inst *inst)
+{
+   struct venus_core *core = inst->core;
+   const struct freq_tbl *table = core->res->freq_tbl;
+   unsigned int num_rows = core->res->freq_tbl_size;
+   struct v4l2_m2m_ctx *m2m_ctx = inst->m2m_ctx;
+   struct device *dev = core->dev;
+   unsigned long freq = 0, freq_core1 = 0, freq_core2 = 0;
+   unsigned long filled_len = 0;
+   struct venus_buffer *buf, *n;
+   struct vb2_buffer *vb;
+   int i, ret;
+
+   v4l2_m2m_for_each_src_buf_safe(m2m_ctx, buf, n) {
+   vb = >vb.vb2_buf;
+   filled_len = max(filled_len, vb2_get_plane_payload(vb, 0));
+   }
+
+   if (inst->session_type == VIDC_SESSION_TYPE_DEC && !filled_len)
+   return 0;
+
+   freq = calculate_inst_freq(inst, filled_len);
+   inst->clk_data.freq = freq;
+
+   mutex_lock(>lock);
+   list_for_each_entry(inst, >instances, list) {
+   if (inst->clk_data.core_id == VIDC_CORE_ID_1) {
+   freq_core1 += inst->clk_data.freq;
+   } else if (inst->clk_data.core_id == VIDC_CORE_ID_2) {
+   freq_core2 += inst->clk_data.freq;
+   } else if (inst->clk_data.core_id == VIDC_CORE_ID_3) {
+   freq_core1 += inst->clk_data.freq;
+   freq_core2 += inst->clk_data.freq;
+   }
+   }
+   mutex_unlock(>lock);
+
+   freq = max(freq_core1, freq_core2

[RESEND v7 0/2]media: venus: Update clock scaling

2019-09-23 Thread Aniket Masule
In this patch series, clock scaling and core selection methods are
updated. Current clock scaling is same for vpu4 and previous versions.
Introducing load calculations using vpp cycles, which indicates the
cycles required by video hardware to process each macroblock. Also
adding vsp cycles, cycles require by stream processor. Clock scaling
is now done more precisely using vpp and vsp cycles.
Removing core selection from this series, there will be separate patch
once issue related to power domain is fixed.

This patch depends on the following patch:
https://patchwork.kernel.org/patch/11142557/ - Venus interconnect support for 
sdm845

Changes since v6:
 - Removed core selection.
 - Corrected frequency calculations.
 - Removed instance lock used while iterating over buffers.
 
Changes since v5:
 - Corrected load_per_core calculations.

Changes since v4:
 - Added call to load_scale_clocks from venus_helper_vb2_buf_queue.
 - Modified check to match core_id in core_selection.

Changes since v3:
 - vsp_cycles and vpp_cyles are now unsigned long.
 - Core number counting aligned with VIDC_CORE_ID_.
 - Aligned hardware overload handling of scale_clocks_v4 with scale_clocks.
 - Added bitrate based clock scaling patch in this patch series.
 - Instance state check is now moved from scale_clocks to load_scale_clocks

Aniket Masule (2):
  media: venus: Add codec data table
  media: venus: Update clock scaling

 drivers/media/platform/qcom/venus/core.c|  13 ++
 drivers/media/platform/qcom/venus/core.h|  16 +++
 drivers/media/platform/qcom/venus/helpers.c | 187 +---
 drivers/media/platform/qcom/venus/helpers.h |   3 +-
 drivers/media/platform/qcom/venus/vdec.c|   8 +-
 drivers/media/platform/qcom/venus/venc.c|   4 +
 6 files changed, 208 insertions(+), 23 deletions(-)

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH v7 0/3] media: venus: Update clock scaling

2019-08-27 Thread Aniket Masule
In this patch series, clock scaling and core selection methods are
updated. Current clock scaling is same for vpu4 and previous versions.
Introducing load calculations using vpp cycles, which indicates the
cycles required by video hardware to process each macroblock. Also
adding vsp cycles, cycles require by stream processor. Clock scaling
is now done more precisely using vpp and vsp cycles.
Removing core selection from this series, there will be separate patch
once issue related to power domain is fixed.

This patch depends on the following patches:
https://lore.kernel.org/patchwork/patch/1114762/ - Venus interconnect support 
for sdm845
https://lore.kernel.org/patchwork/patch/1114761/ - Venus interconnect support 
for sdm845

Changes since v6:
 - Removed core selection.
 - Corrected frequency calculations.
 - Removed instance lock used while iterating over buffers.
 
Changes since v5:
 - Corrected load_per_core calculations.

Changes since v4:
 - Added call to load_scale_clocks from venus_helper_vb2_buf_queue.
 - Modified check to match core_id in core_selection.

Changes since v3:
 - vsp_cycles and vpp_cyles are now unsigned long.
 - Core number counting aligned with VIDC_CORE_ID_.
 - Aligned hardware overload handling of scale_clocks_v4 with scale_clocks.
 - Added bitrate based clock scaling patch in this patch series.
 - Instance state check is now moved from scale_clocks to load_scale_clocks

Aniket Masule (3):
  media: venus: Add codec data table
  media: venus: Update clock scaling
  media: venus: Update to bitrate based clock scaling

 drivers/media/platform/qcom/venus/core.c|  13 ++
 drivers/media/platform/qcom/venus/core.h|  16 +++
 drivers/media/platform/qcom/venus/helpers.c | 188 +---
 drivers/media/platform/qcom/venus/helpers.h |   3 +-
 drivers/media/platform/qcom/venus/vdec.c|   8 +-
 drivers/media/platform/qcom/venus/venc.c|   4 +
 6 files changed, 209 insertions(+), 23 deletions(-)

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH v7 3/3] media: venus: Update to bitrate based clock scaling

2019-08-27 Thread Aniket Masule
Introduced clock scaling using bitrate, preavious
calculations consider only the cycles per mb.
Also, clock scaling is now triggered before every
buffer being queued to the device. This helps in
deciding precise clock cycles required.

Signed-off-by: Aniket Masule 
---
 drivers/media/platform/qcom/venus/helpers.c | 33 -
 1 file changed, 28 insertions(+), 5 deletions(-)

diff --git a/drivers/media/platform/qcom/venus/helpers.c 
b/drivers/media/platform/qcom/venus/helpers.c
index 4ed630b..8fee0ef 100644
--- a/drivers/media/platform/qcom/venus/helpers.c
+++ b/drivers/media/platform/qcom/venus/helpers.c
@@ -516,17 +516,26 @@ static int scale_clocks(struct venus_inst *inst)
return 0;
 }
 
-static unsigned long calculate_vpp_freq(struct venus_inst *inst)
+static unsigned long calculate_inst_freq(struct venus_inst *inst,
+unsigned long filled_len)
 {
-   unsigned long vpp_freq = 0;
+   unsigned long vpp_freq = 0, vsp_freq = 0;
+   u64 fps = inst->fps;
u32 mbs_per_sec;
 
-   mbs_per_sec = load_per_instance(inst);
+   mbs_per_sec = load_per_instance(inst) / inst->fps;
vpp_freq = mbs_per_sec * inst->clk_data.codec_freq_data->vpp_freq;
/* 21 / 20 is overhead factor */
vpp_freq += vpp_freq / 20;
+   vsp_freq = mbs_per_sec * inst->clk_data.codec_freq_data->vsp_freq;
 
-   return vpp_freq;
+   /* 10 / 7 is overhead factor */
+   if (inst->session_type == VIDC_SESSION_TYPE_ENC)
+   vsp_freq += (inst->controls.enc.bitrate * 10) / 7;
+   else
+   vsp_freq += ((fps * filled_len * 8) * 10) / 7;
+
+   return max(vpp_freq, vsp_freq);
 }
 
 static int scale_clocks_v4(struct venus_inst *inst)
@@ -534,12 +543,24 @@ static int scale_clocks_v4(struct venus_inst *inst)
struct venus_core *core = inst->core;
const struct freq_tbl *table = core->res->freq_tbl;
unsigned int num_rows = core->res->freq_tbl_size;
+   struct v4l2_m2m_ctx *m2m_ctx = inst->m2m_ctx;
struct device *dev = core->dev;
unsigned int i;
unsigned long freq = 0, freq_core1 = 0, freq_core2 = 0;
+   unsigned long filled_len = 0;
+   struct venus_buffer *buf, *n;
+   struct vb2_buffer *vb;
int ret;
 
-   freq = calculate_vpp_freq(inst);
+   v4l2_m2m_for_each_src_buf_safe(m2m_ctx, buf, n) {
+   vb = >vb.vb2_buf;
+   filled_len = max(filled_len, vb2_get_plane_payload(vb, 0));
+   }
+
+   if (inst->session_type == VIDC_SESSION_TYPE_DEC && !filled_len)
+   return 0;
+
+   freq = calculate_inst_freq(inst, filled_len);
inst->clk_data.freq = freq;
 
mutex_lock(>lock);
@@ -701,6 +722,8 @@ void venus_helper_get_ts_metadata(struct venus_inst *inst, 
u64 timestamp_us,
 
if (inst->session_type == VIDC_SESSION_TYPE_DEC)
put_ts_metadata(inst, vbuf);
+
+   venus_helper_load_scale_clocks(inst);
} else if (type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) {
if (inst->session_type == VIDC_SESSION_TYPE_ENC)
fdata.buffer_type = HFI_BUFFER_OUTPUT;
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH v7 2/3] media: venus: Update clock scaling

2019-08-27 Thread Aniket Masule
Current clock scaling calculations are same for vpu4 and
previous versions. For vpu4, Clock scaling calculations
are updated with cycles/mb. This helps in getting precise
clock required

Signed-off-by: Aniket Masule 
---
 drivers/media/platform/qcom/venus/helpers.c | 135 +++-
 drivers/media/platform/qcom/venus/helpers.h |   2 +-
 drivers/media/platform/qcom/venus/vdec.c|   4 +-
 3 files changed, 118 insertions(+), 23 deletions(-)

diff --git a/drivers/media/platform/qcom/venus/helpers.c 
b/drivers/media/platform/qcom/venus/helpers.c
index 71af237..4ed630b 100644
--- a/drivers/media/platform/qcom/venus/helpers.c
+++ b/drivers/media/platform/qcom/venus/helpers.c
@@ -448,12 +448,32 @@ static int load_scale_bw(struct venus_core *core)
return icc_set_bw(core->video_path, total_avg, total_peak);
 }
 
-int venus_helper_load_scale_clocks(struct venus_core *core)
+static int set_clk_freq(struct venus_core *core, unsigned long freq)
 {
+   struct clk *clk = core->clks[0];
+   int ret;
+
+   ret = clk_set_rate(clk, freq);
+   if (ret)
+   return ret;
+
+   ret = clk_set_rate(core->core0_clk, freq);
+   if (ret)
+   return ret;
+
+   ret = clk_set_rate(core->core1_clk, freq);
+   if (ret)
+   return ret;
+
+   return 0;
+}
+
+static int scale_clocks(struct venus_inst *inst)
+{
+   struct venus_core *core = inst->core;
const struct freq_tbl *table = core->res->freq_tbl;
unsigned int num_rows = core->res->freq_tbl_size;
unsigned long freq = table[0].freq;
-   struct clk *clk = core->clks[0];
struct device *dev = core->dev;
u32 mbs_per_sec;
unsigned int i;
@@ -479,28 +499,103 @@ int venus_helper_load_scale_clocks(struct venus_core 
*core)
 
 set_freq:
 
-   ret = clk_set_rate(clk, freq);
-   if (ret)
-   goto err;
+   ret = set_clk_freq(core, freq);
+   if (ret) {
+   dev_err(dev, "failed to set clock rate %lu (%d)\n",
+   freq, ret);
+   return ret;
+   }
 
-   ret = clk_set_rate(core->core0_clk, freq);
-   if (ret)
-   goto err;
+   ret = load_scale_bw(core);
+   if (ret) {
+   dev_err(dev, "failed to set bandwidth (%d)\n",
+   ret);
+   return ret;
+   }
 
-   ret = clk_set_rate(core->core1_clk, freq);
-   if (ret)
-   goto err;
+   return 0;
+}
+
+static unsigned long calculate_vpp_freq(struct venus_inst *inst)
+{
+   unsigned long vpp_freq = 0;
+   u32 mbs_per_sec;
+
+   mbs_per_sec = load_per_instance(inst);
+   vpp_freq = mbs_per_sec * inst->clk_data.codec_freq_data->vpp_freq;
+   /* 21 / 20 is overhead factor */
+   vpp_freq += vpp_freq / 20;
+
+   return vpp_freq;
+}
+
+static int scale_clocks_v4(struct venus_inst *inst)
+{
+   struct venus_core *core = inst->core;
+   const struct freq_tbl *table = core->res->freq_tbl;
+   unsigned int num_rows = core->res->freq_tbl_size;
+   struct device *dev = core->dev;
+   unsigned int i;
+   unsigned long freq = 0, freq_core1 = 0, freq_core2 = 0;
+   int ret;
+
+   freq = calculate_vpp_freq(inst);
+   inst->clk_data.freq = freq;
+
+   mutex_lock(>lock);
+   list_for_each_entry(inst, >instances, list) {
+   if (inst->clk_data.core_id == VIDC_CORE_ID_1) {
+   freq_core1 += inst->clk_data.freq;
+   } else if (inst->clk_data.core_id == VIDC_CORE_ID_2) {
+   freq_core2 += inst->clk_data.freq;
+   } else if (inst->clk_data.core_id == VIDC_CORE_ID_3) {
+   freq_core1 += inst->clk_data.freq;
+   freq_core2 += inst->clk_data.freq;
+   }
+   }
+   mutex_unlock(>lock);
+
+   freq = max(freq_core1, freq_core2);
+
+   if (freq >= table[0].freq) {
+   freq = table[0].freq;
+   dev_warn(dev, "HW is overloaded, needed: %lu max: %lu\n",
+freq, table[0].freq);
+   goto set_freq;
+   }
+
+   for (i = num_rows - 1 ; i >= 0; i--) {
+   if (freq <= table[i].freq) {
+   freq = table[i].freq;
+   break;
+   }
+   }
+
+set_freq:
+
+   ret = set_clk_freq(core, freq);
+   if (ret) {
+   dev_err(dev, "failed to set clock rate %lu (%d)\n",
+   freq, ret);
+   return ret;
+   }
 
ret = load_scale_bw(core);
-   if (ret)
-   goto err;
+   if (ret) {
+   dev_err(dev, "failed to set bandwidth (%d)\n",
+   ret);
+   return ret;
+   }
 
return 0;
+}
 
-err:
-   

[PATCH v7 1/3] media: venus: Add codec data table

2019-08-27 Thread Aniket Masule
Add vpp cycles for different types of codec.
It indicates the cycles required by video hardware
to process each macroblock. Add vsp cycles, cycles
required by stream processor. Initialize the codec
data with core resources.

Signed-off-by: Aniket Masule 
---
 drivers/media/platform/qcom/venus/core.c| 13 +
 drivers/media/platform/qcom/venus/core.h| 16 +++
 drivers/media/platform/qcom/venus/helpers.c | 30 +
 drivers/media/platform/qcom/venus/helpers.h |  1 +
 drivers/media/platform/qcom/venus/vdec.c|  4 
 drivers/media/platform/qcom/venus/venc.c|  4 
 6 files changed, 68 insertions(+)

diff --git a/drivers/media/platform/qcom/venus/core.c 
b/drivers/media/platform/qcom/venus/core.c
index 19cbe9d..49d32b2 100644
--- a/drivers/media/platform/qcom/venus/core.c
+++ b/drivers/media/platform/qcom/venus/core.c
@@ -480,6 +480,17 @@ static __maybe_unused int venus_runtime_resume(struct 
device *dev)
{  244800, 1 }, /* 1920x1080@30 */
 };
 
+static struct codec_freq_data sdm845_codec_freq_data[] =  {
+   { V4L2_PIX_FMT_H264, VIDC_SESSION_TYPE_ENC, 675, 10 },
+   { V4L2_PIX_FMT_HEVC, VIDC_SESSION_TYPE_ENC, 675, 10 },
+   { V4L2_PIX_FMT_VP8, VIDC_SESSION_TYPE_ENC, 675, 10 },
+   { V4L2_PIX_FMT_MPEG2, VIDC_SESSION_TYPE_DEC, 200, 10 },
+   { V4L2_PIX_FMT_H264, VIDC_SESSION_TYPE_DEC, 200, 10 },
+   { V4L2_PIX_FMT_HEVC, VIDC_SESSION_TYPE_DEC, 200, 10 },
+   { V4L2_PIX_FMT_VP8, VIDC_SESSION_TYPE_DEC, 200, 10 },
+   { V4L2_PIX_FMT_VP9, VIDC_SESSION_TYPE_DEC, 200, 10 },
+};
+
 static const struct bw_tbl sdm845_bw_table_enc[] = {
{ 1944000, 1612000, 0, 2416000, 0 },/* 3840x2160@60 */
{  972000,  951000, 0, 1434000, 0 },/* 3840x2160@30 */
@@ -501,6 +512,8 @@ static __maybe_unused int venus_runtime_resume(struct 
device *dev)
.bw_tbl_enc_size = ARRAY_SIZE(sdm845_bw_table_enc),
.bw_tbl_dec = sdm845_bw_table_dec,
.bw_tbl_dec_size = ARRAY_SIZE(sdm845_bw_table_dec),
+   .codec_freq_data = sdm845_codec_freq_data,
+   .codec_freq_data_size = ARRAY_SIZE(sdm845_codec_freq_data),
.clks = {"core", "iface", "bus" },
.clks_num = 3,
.max_load = 3110400,/* 4096x2160@90 */
diff --git a/drivers/media/platform/qcom/venus/core.h 
b/drivers/media/platform/qcom/venus/core.h
index 13e35f3..684a950 100644
--- a/drivers/media/platform/qcom/venus/core.h
+++ b/drivers/media/platform/qcom/venus/core.h
@@ -26,6 +26,13 @@ struct reg_val {
u32 value;
 };
 
+struct codec_freq_data {
+   u32 pixfmt;
+   u32 session_type;
+   unsigned long vpp_freq;
+   unsigned long vsp_freq;
+};
+
 struct bw_tbl {
u32 mbs_per_sec;
u32 avg;
@@ -44,6 +51,8 @@ struct venus_resources {
unsigned int bw_tbl_dec_size;
const struct reg_val *reg_tbl;
unsigned int reg_tbl_size;
+   const struct codec_freq_data *codec_freq_data;
+   unsigned int codec_freq_data_size;
const char * const clks[VIDC_CLKS_NUM_MAX];
unsigned int clks_num;
enum hfi_version hfi_version;
@@ -221,6 +230,12 @@ struct venus_buffer {
struct list_head ref_list;
 };
 
+struct clock_data {
+   u32 core_id;
+   unsigned long freq;
+   const struct codec_freq_data *codec_freq_data;
+};
+
 #define to_venus_buffer(ptr)   container_of(ptr, struct venus_buffer, vb)
 
 enum venus_dec_state {
@@ -301,6 +316,7 @@ struct venus_inst {
struct list_head list;
struct mutex lock;
struct venus_core *core;
+   struct clock_data clk_data;
struct list_head dpbbufs;
struct list_head internalbufs;
struct list_head registeredbufs;
diff --git a/drivers/media/platform/qcom/venus/helpers.c 
b/drivers/media/platform/qcom/venus/helpers.c
index 4320ea9..71af237 100644
--- a/drivers/media/platform/qcom/venus/helpers.c
+++ b/drivers/media/platform/qcom/venus/helpers.c
@@ -883,6 +883,36 @@ int venus_helper_set_core_usage(struct venus_inst *inst, 
u32 usage)
 }
 EXPORT_SYMBOL_GPL(venus_helper_set_core_usage);
 
+int venus_helper_init_codec_freq_data(struct venus_inst *inst)
+{
+   const struct codec_freq_data *data;
+   unsigned int i, data_size;
+   u32 pixfmt;
+   int ret = 0;
+
+   if (!IS_V4(inst->core))
+   return 0;
+
+   data = inst->core->res->codec_freq_data;
+   data_size = inst->core->res->codec_freq_data_size;
+   pixfmt = inst->session_type == VIDC_SESSION_TYPE_DEC ?
+   inst->fmt_out->pixfmt : inst->fmt_cap->pixfmt;
+
+   for (i = 0; i < data_size; i++) {
+   if (data[i].pixfmt == pixfmt &&
+   data[i].session_type == inst->session_type) {
+   inst->clk_data.codec_freq_data = [i];
+   break;
+   }
+   }
+
+   if (!

[PATCH 0/3] media: venus: Update clock scaling

2019-08-27 Thread Aniket Masule
In this patch series, clock scaling and core selection methods are
updated. Current clock scaling is same for vpu4 and previous versions.
Introducing load calculations using vpp cycles, which indicates the
cycles required by video hardware to process each macroblock. Also
adding vsp cycles, cycles require by stream processor. Clock scaling
is now done more precisely using vpp and vsp cycles.
Removing core selection from this series, there will be separate patch
once issue related to power domain is fixed.

This patch depends on the following patches:
https://lore.kernel.org/patchwork/patch/1114762/ - Venus interconnect support 
for sdm845
https://lore.kernel.org/patchwork/patch/1114761/ - Venus interconnect support 
for sdm845

Changes since v6:
 - Removed core selection.
 - Corrected frequency calculations.
 - Removed instance lock used while iterating over buffers.
 
Changes since v5:
 - Corrected load_per_core calculations.

Changes since v4:
 - Added call to load_scale_clocks from venus_helper_vb2_buf_queue.
 - Modified check to match core_id in core_selection.

Changes since v3:
 - vsp_cycles and vpp_cyles are now unsigned long.
 - Core number counting aligned with VIDC_CORE_ID_.
 - Aligned hardware overload handling of scale_clocks_v4 with scale_clocks.
 - Added bitrate based clock scaling patch in this patch series.
 - Instance state check is now moved from scale_clocks to load_scale_clocks.

Aniket Masule (3):
  media: venus: Add codec data table
  media: venus: Update clock scaling
  media: venus: Update to bitrate based clock scaling

 drivers/media/platform/qcom/venus/core.c|  13 ++
 drivers/media/platform/qcom/venus/core.h|  16 +++
 drivers/media/platform/qcom/venus/helpers.c | 188 +---
 drivers/media/platform/qcom/venus/helpers.h |   3 +-
 drivers/media/platform/qcom/venus/vdec.c|   8 +-
 drivers/media/platform/qcom/venus/venc.c|   4 +
 6 files changed, 209 insertions(+), 23 deletions(-)

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH 1/3] media: venus: Add codec data table

2019-08-27 Thread Aniket Masule
Add vpp cycles for different types of codec.
It indicates the cycles required by video hardware
to process each macroblock. Add vsp cycles, cycles
required by stream processor. Initialize the codec
data with core resources.

Signed-off-by: Aniket Masule 
---
 drivers/media/platform/qcom/venus/core.c| 13 +
 drivers/media/platform/qcom/venus/core.h| 16 +++
 drivers/media/platform/qcom/venus/helpers.c | 30 +
 drivers/media/platform/qcom/venus/helpers.h |  1 +
 drivers/media/platform/qcom/venus/vdec.c|  4 
 drivers/media/platform/qcom/venus/venc.c|  4 
 6 files changed, 68 insertions(+)

diff --git a/drivers/media/platform/qcom/venus/core.c 
b/drivers/media/platform/qcom/venus/core.c
index 19cbe9d..49d32b2 100644
--- a/drivers/media/platform/qcom/venus/core.c
+++ b/drivers/media/platform/qcom/venus/core.c
@@ -480,6 +480,17 @@ static __maybe_unused int venus_runtime_resume(struct 
device *dev)
{  244800, 1 }, /* 1920x1080@30 */
 };
 
+static struct codec_freq_data sdm845_codec_freq_data[] =  {
+   { V4L2_PIX_FMT_H264, VIDC_SESSION_TYPE_ENC, 675, 10 },
+   { V4L2_PIX_FMT_HEVC, VIDC_SESSION_TYPE_ENC, 675, 10 },
+   { V4L2_PIX_FMT_VP8, VIDC_SESSION_TYPE_ENC, 675, 10 },
+   { V4L2_PIX_FMT_MPEG2, VIDC_SESSION_TYPE_DEC, 200, 10 },
+   { V4L2_PIX_FMT_H264, VIDC_SESSION_TYPE_DEC, 200, 10 },
+   { V4L2_PIX_FMT_HEVC, VIDC_SESSION_TYPE_DEC, 200, 10 },
+   { V4L2_PIX_FMT_VP8, VIDC_SESSION_TYPE_DEC, 200, 10 },
+   { V4L2_PIX_FMT_VP9, VIDC_SESSION_TYPE_DEC, 200, 10 },
+};
+
 static const struct bw_tbl sdm845_bw_table_enc[] = {
{ 1944000, 1612000, 0, 2416000, 0 },/* 3840x2160@60 */
{  972000,  951000, 0, 1434000, 0 },/* 3840x2160@30 */
@@ -501,6 +512,8 @@ static __maybe_unused int venus_runtime_resume(struct 
device *dev)
.bw_tbl_enc_size = ARRAY_SIZE(sdm845_bw_table_enc),
.bw_tbl_dec = sdm845_bw_table_dec,
.bw_tbl_dec_size = ARRAY_SIZE(sdm845_bw_table_dec),
+   .codec_freq_data = sdm845_codec_freq_data,
+   .codec_freq_data_size = ARRAY_SIZE(sdm845_codec_freq_data),
.clks = {"core", "iface", "bus" },
.clks_num = 3,
.max_load = 3110400,/* 4096x2160@90 */
diff --git a/drivers/media/platform/qcom/venus/core.h 
b/drivers/media/platform/qcom/venus/core.h
index 13e35f3..684a950 100644
--- a/drivers/media/platform/qcom/venus/core.h
+++ b/drivers/media/platform/qcom/venus/core.h
@@ -26,6 +26,13 @@ struct reg_val {
u32 value;
 };
 
+struct codec_freq_data {
+   u32 pixfmt;
+   u32 session_type;
+   unsigned long vpp_freq;
+   unsigned long vsp_freq;
+};
+
 struct bw_tbl {
u32 mbs_per_sec;
u32 avg;
@@ -44,6 +51,8 @@ struct venus_resources {
unsigned int bw_tbl_dec_size;
const struct reg_val *reg_tbl;
unsigned int reg_tbl_size;
+   const struct codec_freq_data *codec_freq_data;
+   unsigned int codec_freq_data_size;
const char * const clks[VIDC_CLKS_NUM_MAX];
unsigned int clks_num;
enum hfi_version hfi_version;
@@ -221,6 +230,12 @@ struct venus_buffer {
struct list_head ref_list;
 };
 
+struct clock_data {
+   u32 core_id;
+   unsigned long freq;
+   const struct codec_freq_data *codec_freq_data;
+};
+
 #define to_venus_buffer(ptr)   container_of(ptr, struct venus_buffer, vb)
 
 enum venus_dec_state {
@@ -301,6 +316,7 @@ struct venus_inst {
struct list_head list;
struct mutex lock;
struct venus_core *core;
+   struct clock_data clk_data;
struct list_head dpbbufs;
struct list_head internalbufs;
struct list_head registeredbufs;
diff --git a/drivers/media/platform/qcom/venus/helpers.c 
b/drivers/media/platform/qcom/venus/helpers.c
index 4320ea9..71af237 100644
--- a/drivers/media/platform/qcom/venus/helpers.c
+++ b/drivers/media/platform/qcom/venus/helpers.c
@@ -883,6 +883,36 @@ int venus_helper_set_core_usage(struct venus_inst *inst, 
u32 usage)
 }
 EXPORT_SYMBOL_GPL(venus_helper_set_core_usage);
 
+int venus_helper_init_codec_freq_data(struct venus_inst *inst)
+{
+   const struct codec_freq_data *data;
+   unsigned int i, data_size;
+   u32 pixfmt;
+   int ret = 0;
+
+   if (!IS_V4(inst->core))
+   return 0;
+
+   data = inst->core->res->codec_freq_data;
+   data_size = inst->core->res->codec_freq_data_size;
+   pixfmt = inst->session_type == VIDC_SESSION_TYPE_DEC ?
+   inst->fmt_out->pixfmt : inst->fmt_cap->pixfmt;
+
+   for (i = 0; i < data_size; i++) {
+   if (data[i].pixfmt == pixfmt &&
+   data[i].session_type == inst->session_type) {
+   inst->clk_data.codec_freq_data = [i];
+   break;
+   }
+   }
+
+   if (!

[PATCH 2/3] media: venus: Update clock scaling

2019-08-27 Thread Aniket Masule
Current clock scaling calculations are same for vpu4 and
previous versions. For vpu4, Clock scaling calculations
are updated with cycles/mb. This helps in getting precise
clock required

Signed-off-by: Aniket Masule 
---
 drivers/media/platform/qcom/venus/helpers.c | 135 +++-
 drivers/media/platform/qcom/venus/helpers.h |   2 +-
 drivers/media/platform/qcom/venus/vdec.c|   4 +-
 3 files changed, 118 insertions(+), 23 deletions(-)

diff --git a/drivers/media/platform/qcom/venus/helpers.c 
b/drivers/media/platform/qcom/venus/helpers.c
index 71af237..4ed630b 100644
--- a/drivers/media/platform/qcom/venus/helpers.c
+++ b/drivers/media/platform/qcom/venus/helpers.c
@@ -448,12 +448,32 @@ static int load_scale_bw(struct venus_core *core)
return icc_set_bw(core->video_path, total_avg, total_peak);
 }
 
-int venus_helper_load_scale_clocks(struct venus_core *core)
+static int set_clk_freq(struct venus_core *core, unsigned long freq)
 {
+   struct clk *clk = core->clks[0];
+   int ret;
+
+   ret = clk_set_rate(clk, freq);
+   if (ret)
+   return ret;
+
+   ret = clk_set_rate(core->core0_clk, freq);
+   if (ret)
+   return ret;
+
+   ret = clk_set_rate(core->core1_clk, freq);
+   if (ret)
+   return ret;
+
+   return 0;
+}
+
+static int scale_clocks(struct venus_inst *inst)
+{
+   struct venus_core *core = inst->core;
const struct freq_tbl *table = core->res->freq_tbl;
unsigned int num_rows = core->res->freq_tbl_size;
unsigned long freq = table[0].freq;
-   struct clk *clk = core->clks[0];
struct device *dev = core->dev;
u32 mbs_per_sec;
unsigned int i;
@@ -479,28 +499,103 @@ int venus_helper_load_scale_clocks(struct venus_core 
*core)
 
 set_freq:
 
-   ret = clk_set_rate(clk, freq);
-   if (ret)
-   goto err;
+   ret = set_clk_freq(core, freq);
+   if (ret) {
+   dev_err(dev, "failed to set clock rate %lu (%d)\n",
+   freq, ret);
+   return ret;
+   }
 
-   ret = clk_set_rate(core->core0_clk, freq);
-   if (ret)
-   goto err;
+   ret = load_scale_bw(core);
+   if (ret) {
+   dev_err(dev, "failed to set bandwidth (%d)\n",
+   ret);
+   return ret;
+   }
 
-   ret = clk_set_rate(core->core1_clk, freq);
-   if (ret)
-   goto err;
+   return 0;
+}
+
+static unsigned long calculate_vpp_freq(struct venus_inst *inst)
+{
+   unsigned long vpp_freq = 0;
+   u32 mbs_per_sec;
+
+   mbs_per_sec = load_per_instance(inst);
+   vpp_freq = mbs_per_sec * inst->clk_data.codec_freq_data->vpp_freq;
+   /* 21 / 20 is overhead factor */
+   vpp_freq += vpp_freq / 20;
+
+   return vpp_freq;
+}
+
+static int scale_clocks_v4(struct venus_inst *inst)
+{
+   struct venus_core *core = inst->core;
+   const struct freq_tbl *table = core->res->freq_tbl;
+   unsigned int num_rows = core->res->freq_tbl_size;
+   struct device *dev = core->dev;
+   unsigned int i;
+   unsigned long freq = 0, freq_core1 = 0, freq_core2 = 0;
+   int ret;
+
+   freq = calculate_vpp_freq(inst);
+   inst->clk_data.freq = freq;
+
+   mutex_lock(>lock);
+   list_for_each_entry(inst, >instances, list) {
+   if (inst->clk_data.core_id == VIDC_CORE_ID_1) {
+   freq_core1 += inst->clk_data.freq;
+   } else if (inst->clk_data.core_id == VIDC_CORE_ID_2) {
+   freq_core2 += inst->clk_data.freq;
+   } else if (inst->clk_data.core_id == VIDC_CORE_ID_3) {
+   freq_core1 += inst->clk_data.freq;
+   freq_core2 += inst->clk_data.freq;
+   }
+   }
+   mutex_unlock(>lock);
+
+   freq = max(freq_core1, freq_core2);
+
+   if (freq >= table[0].freq) {
+   freq = table[0].freq;
+   dev_warn(dev, "HW is overloaded, needed: %lu max: %lu\n",
+freq, table[0].freq);
+   goto set_freq;
+   }
+
+   for (i = num_rows - 1 ; i >= 0; i--) {
+   if (freq <= table[i].freq) {
+   freq = table[i].freq;
+   break;
+   }
+   }
+
+set_freq:
+
+   ret = set_clk_freq(core, freq);
+   if (ret) {
+   dev_err(dev, "failed to set clock rate %lu (%d)\n",
+   freq, ret);
+   return ret;
+   }
 
ret = load_scale_bw(core);
-   if (ret)
-   goto err;
+   if (ret) {
+   dev_err(dev, "failed to set bandwidth (%d)\n",
+   ret);
+   return ret;
+   }
 
return 0;
+}
 
-err:
-   

[PATCH 3/3] media: venus: Update to bitrate based clock scaling

2019-08-27 Thread Aniket Masule
Introduced clock scaling using bitrate, preavious
calculations consider only the cycles per mb.
Also, clock scaling is now triggered before every
buffer being queued to the device. This helps in
deciding precise clock cycles required.

Signed-off-by: Aniket Masule 
---
 drivers/media/platform/qcom/venus/helpers.c | 33 -
 1 file changed, 28 insertions(+), 5 deletions(-)

diff --git a/drivers/media/platform/qcom/venus/helpers.c 
b/drivers/media/platform/qcom/venus/helpers.c
index 4ed630b..8fee0ef 100644
--- a/drivers/media/platform/qcom/venus/helpers.c
+++ b/drivers/media/platform/qcom/venus/helpers.c
@@ -516,17 +516,26 @@ static int scale_clocks(struct venus_inst *inst)
return 0;
 }
 
-static unsigned long calculate_vpp_freq(struct venus_inst *inst)
+static unsigned long calculate_inst_freq(struct venus_inst *inst,
+unsigned long filled_len)
 {
-   unsigned long vpp_freq = 0;
+   unsigned long vpp_freq = 0, vsp_freq = 0;
+   u64 fps = inst->fps;
u32 mbs_per_sec;
 
-   mbs_per_sec = load_per_instance(inst);
+   mbs_per_sec = load_per_instance(inst) / inst->fps;
vpp_freq = mbs_per_sec * inst->clk_data.codec_freq_data->vpp_freq;
/* 21 / 20 is overhead factor */
vpp_freq += vpp_freq / 20;
+   vsp_freq = mbs_per_sec * inst->clk_data.codec_freq_data->vsp_freq;
 
-   return vpp_freq;
+   /* 10 / 7 is overhead factor */
+   if (inst->session_type == VIDC_SESSION_TYPE_ENC)
+   vsp_freq += (inst->controls.enc.bitrate * 10) / 7;
+   else
+   vsp_freq += ((fps * filled_len * 8) * 10) / 7;
+
+   return max(vpp_freq, vsp_freq);
 }
 
 static int scale_clocks_v4(struct venus_inst *inst)
@@ -534,12 +543,24 @@ static int scale_clocks_v4(struct venus_inst *inst)
struct venus_core *core = inst->core;
const struct freq_tbl *table = core->res->freq_tbl;
unsigned int num_rows = core->res->freq_tbl_size;
+   struct v4l2_m2m_ctx *m2m_ctx = inst->m2m_ctx;
struct device *dev = core->dev;
unsigned int i;
unsigned long freq = 0, freq_core1 = 0, freq_core2 = 0;
+   unsigned long filled_len = 0;
+   struct venus_buffer *buf, *n;
+   struct vb2_buffer *vb;
int ret;
 
-   freq = calculate_vpp_freq(inst);
+   v4l2_m2m_for_each_src_buf_safe(m2m_ctx, buf, n) {
+   vb = >vb.vb2_buf;
+   filled_len = max(filled_len, vb2_get_plane_payload(vb, 0));
+   }
+
+   if (inst->session_type == VIDC_SESSION_TYPE_DEC && !filled_len)
+   return 0;
+
+   freq = calculate_inst_freq(inst, filled_len);
inst->clk_data.freq = freq;
 
mutex_lock(>lock);
@@ -701,6 +722,8 @@ void venus_helper_get_ts_metadata(struct venus_inst *inst, 
u64 timestamp_us,
 
if (inst->session_type == VIDC_SESSION_TYPE_DEC)
put_ts_metadata(inst, vbuf);
+
+   venus_helper_load_scale_clocks(inst);
} else if (type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) {
if (inst->session_type == VIDC_SESSION_TYPE_ENC)
fdata.buffer_type = HFI_BUFFER_OUTPUT;
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH v6 2/4] media: venus: Update clock scaling

2019-07-22 Thread Aniket Masule
Current clock scaling calculations are same for vpu4 and
previous versions. For vpu4, Clock scaling calculations
are updated with cycles/mb. This helps in getting precise
clock required.

Signed-off-by: Aniket Masule 
---
 drivers/media/platform/qcom/venus/helpers.c | 91 +++--
 1 file changed, 87 insertions(+), 4 deletions(-)

diff --git a/drivers/media/platform/qcom/venus/helpers.c 
b/drivers/media/platform/qcom/venus/helpers.c
index 7492373..2c976e4 100644
--- a/drivers/media/platform/qcom/venus/helpers.c
+++ b/drivers/media/platform/qcom/venus/helpers.c
@@ -348,8 +348,9 @@ static u32 load_per_type(struct venus_core *core, u32 
session_type)
return mbs_per_sec;
 }
 
-static int load_scale_clocks(struct venus_core *core)
+static int scale_clocks(struct venus_inst *inst)
 {
+   struct venus_core *core = inst->core;
const struct freq_tbl *table = core->res->freq_tbl;
unsigned int num_rows = core->res->freq_tbl_size;
unsigned long freq = table[0].freq;
@@ -398,6 +399,89 @@ static int load_scale_clocks(struct venus_core *core)
return ret;
 }
 
+static unsigned long calculate_vpp_freq(struct venus_inst *inst)
+{
+   unsigned long vpp_freq = 0;
+   u32 mbs_per_sec;
+
+   mbs_per_sec = load_per_instance(inst);
+   vpp_freq = mbs_per_sec * inst->clk_data.codec_freq_data->vpp_freq;
+   /* 21 / 20 is overhead factor */
+   vpp_freq += vpp_freq / 20;
+
+   return vpp_freq;
+}
+
+static int scale_clocks_v4(struct venus_inst *inst)
+{
+   struct venus_core *core = inst->core;
+   const struct freq_tbl *table = core->res->freq_tbl;
+   unsigned int num_rows = core->res->freq_tbl_size;
+   struct clk *clk = core->clks[0];
+   struct device *dev = core->dev;
+   unsigned int i;
+   unsigned long freq = 0, freq_core1 = 0, freq_core2 = 0;
+   int ret;
+
+   freq = calculate_vpp_freq(inst);
+
+   if (freq > table[0].freq)
+   dev_warn(dev, "HW is overloaded, needed: %lu max: %lu\n",
+freq, table[0].freq);
+
+   for (i = 0; i < num_rows; i++) {
+   if (freq > table[i].freq)
+   break;
+   freq = table[i].freq;
+   }
+
+   inst->clk_data.freq = freq;
+
+   mutex_lock(>lock);
+   list_for_each_entry(inst, >instances, list) {
+   if (inst->clk_data.core_id == VIDC_CORE_ID_1) {
+   freq_core1 += inst->clk_data.freq;
+   } else if (inst->clk_data.core_id == VIDC_CORE_ID_2) {
+   freq_core2 += inst->clk_data.freq;
+   } else if (inst->clk_data.core_id == VIDC_CORE_ID_3) {
+   freq_core1 += inst->clk_data.freq;
+   freq_core2 += inst->clk_data.freq;
+   }
+   }
+   mutex_unlock(>lock);
+
+   freq = max(freq_core1, freq_core2);
+
+   ret = clk_set_rate(clk, freq);
+   if (ret)
+   goto err;
+
+   ret = clk_set_rate(core->core0_clk, freq);
+   if (ret)
+   goto err;
+
+   ret = clk_set_rate(core->core1_clk, freq);
+   if (ret)
+   goto err;
+
+   return 0;
+
+err:
+   dev_err(dev, "failed to set clock rate %lu (%d)\n", freq, ret);
+   return ret;
+}
+
+static int load_scale_clocks(struct venus_inst *inst)
+{
+   if (IS_V4(inst->core))
+   return scale_clocks_v4(inst);
+
+   if (inst->state == INST_START)
+   return 0;
+
+   return scale_clocks(inst);
+}
+
 static void fill_buffer_desc(const struct venus_buffer *buf,
 struct hfi_buffer_desc *bd, bool response)
 {
@@ -1053,7 +1137,7 @@ void venus_helper_vb2_stop_streaming(struct vb2_queue *q)
 
venus_helper_free_dpb_bufs(inst);
 
-   load_scale_clocks(core);
+   load_scale_clocks(inst);
INIT_LIST_HEAD(>registeredbufs);
}
 
@@ -1070,7 +1154,6 @@ void venus_helper_vb2_stop_streaming(struct vb2_queue *q)
 
 int venus_helper_vb2_start_streaming(struct venus_inst *inst)
 {
-   struct venus_core *core = inst->core;
int ret;
 
ret = intbufs_alloc(inst);
@@ -1081,7 +1164,7 @@ int venus_helper_vb2_start_streaming(struct venus_inst 
*inst)
if (ret)
goto err_bufs_free;
 
-   load_scale_clocks(core);
+   load_scale_clocks(inst);
 
ret = hfi_session_load_res(inst);
if (ret)
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH v6 1/4] media: venus: Add codec data table

2019-07-22 Thread Aniket Masule
Add vpp cycles for different types of codec.
It indicates the cycles required by video hardware
to process each macroblock. Add vsp cycles, cycles
required by stream processor. Initialize the codec
data with core resources.

Signed-off-by: Aniket Masule 
---
 drivers/media/platform/qcom/venus/core.c| 13 +
 drivers/media/platform/qcom/venus/core.h| 16 +++
 drivers/media/platform/qcom/venus/helpers.c | 30 +
 drivers/media/platform/qcom/venus/helpers.h |  1 +
 drivers/media/platform/qcom/venus/vdec.c|  4 
 drivers/media/platform/qcom/venus/venc.c|  4 
 6 files changed, 68 insertions(+)

diff --git a/drivers/media/platform/qcom/venus/core.c 
b/drivers/media/platform/qcom/venus/core.c
index 7393667..ad6bb74 100644
--- a/drivers/media/platform/qcom/venus/core.c
+++ b/drivers/media/platform/qcom/venus/core.c
@@ -473,9 +473,22 @@ static __maybe_unused int venus_runtime_resume(struct 
device *dev)
{  244800, 1 }, /* 1920x1080@30 */
 };
 
+static struct codec_freq_data sdm845_codec_freq_data[] =  {
+   { V4L2_PIX_FMT_H264, VIDC_SESSION_TYPE_ENC, 675, 10 },
+   { V4L2_PIX_FMT_HEVC, VIDC_SESSION_TYPE_ENC, 675, 10 },
+   { V4L2_PIX_FMT_VP8, VIDC_SESSION_TYPE_ENC, 675, 10 },
+   { V4L2_PIX_FMT_MPEG2, VIDC_SESSION_TYPE_DEC, 200, 10 },
+   { V4L2_PIX_FMT_H264, VIDC_SESSION_TYPE_DEC, 200, 10 },
+   { V4L2_PIX_FMT_HEVC, VIDC_SESSION_TYPE_DEC, 200, 10 },
+   { V4L2_PIX_FMT_VP8, VIDC_SESSION_TYPE_DEC, 200, 10 },
+   { V4L2_PIX_FMT_VP9, VIDC_SESSION_TYPE_DEC, 200, 10 },
+};
+
 static const struct venus_resources sdm845_res = {
.freq_tbl = sdm845_freq_table,
.freq_tbl_size = ARRAY_SIZE(sdm845_freq_table),
+   .codec_freq_data = sdm845_codec_freq_data,
+   .codec_freq_data_size = ARRAY_SIZE(sdm845_codec_freq_data),
.clks = {"core", "iface", "bus" },
.clks_num = 3,
.max_load = 2563200,
diff --git a/drivers/media/platform/qcom/venus/core.h 
b/drivers/media/platform/qcom/venus/core.h
index 7a3feb5..b8aef19 100644
--- a/drivers/media/platform/qcom/venus/core.h
+++ b/drivers/media/platform/qcom/venus/core.h
@@ -35,12 +35,21 @@ struct reg_val {
u32 value;
 };
 
+struct codec_freq_data {
+   u32 pixfmt;
+   u32 session_type;
+   unsigned long vpp_freq;
+   unsigned long vsp_freq;
+};
+
 struct venus_resources {
u64 dma_mask;
const struct freq_tbl *freq_tbl;
unsigned int freq_tbl_size;
const struct reg_val *reg_tbl;
unsigned int reg_tbl_size;
+   const struct codec_freq_data *codec_freq_data;
+   unsigned int codec_freq_data_size;
const char * const clks[VIDC_CLKS_NUM_MAX];
unsigned int clks_num;
enum hfi_version hfi_version;
@@ -216,6 +225,12 @@ struct venus_buffer {
struct list_head ref_list;
 };
 
+struct clock_data {
+   u32 core_id;
+   unsigned long freq;
+   const struct codec_freq_data *codec_freq_data;
+};
+
 #define to_venus_buffer(ptr)   container_of(ptr, struct venus_buffer, vb)
 
 /**
@@ -275,6 +290,7 @@ struct venus_inst {
struct list_head list;
struct mutex lock;
struct venus_core *core;
+   struct clock_data clk_data;
struct list_head dpbbufs;
struct list_head internalbufs;
struct list_head registeredbufs;
diff --git a/drivers/media/platform/qcom/venus/helpers.c 
b/drivers/media/platform/qcom/venus/helpers.c
index 5cad601..7492373 100644
--- a/drivers/media/platform/qcom/venus/helpers.c
+++ b/drivers/media/platform/qcom/venus/helpers.c
@@ -715,6 +715,36 @@ int venus_helper_set_core_usage(struct venus_inst *inst, 
u32 usage)
 }
 EXPORT_SYMBOL_GPL(venus_helper_set_core_usage);
 
+int venus_helper_init_codec_freq_data(struct venus_inst *inst)
+{
+   const struct codec_freq_data *data;
+   unsigned int i, data_size;
+   u32 pixfmt;
+   int ret = 0;
+
+   if (!IS_V4(inst->core))
+   return 0;
+
+   data = inst->core->res->codec_freq_data;
+   data_size = inst->core->res->codec_freq_data_size;
+   pixfmt = inst->session_type == VIDC_SESSION_TYPE_DEC ?
+   inst->fmt_out->pixfmt : inst->fmt_cap->pixfmt;
+
+   for (i = 0; i < data_size; i++) {
+   if (data[i].pixfmt == pixfmt &&
+   data[i].session_type == inst->session_type) {
+   inst->clk_data.codec_freq_data = [i];
+   break;
+   }
+   }
+
+   if (!inst->clk_data.codec_freq_data)
+   ret = -EINVAL;
+
+   return ret;
+}
+EXPORT_SYMBOL_GPL(venus_helper_init_codec_freq_data);
+
 int venus_helper_set_num_bufs(struct venus_inst *inst, unsigned int input_bufs,
  unsigned int output_bufs,
  unsigned int output2_bufs)
diff --git a/drivers/me

[PATCH v6 3/4] media: venus: Update to bitrate based clock scaling

2019-07-22 Thread Aniket Masule
Introduced clock scaling using bitrate, preavious
calculations consider only the cycles per mb.
Also, clock scaling is now triggered before every
buffer being queued to the device. This helps in
deciding precise clock cycles required.

Signed-off-by: Aniket Masule 
---
 drivers/media/platform/qcom/venus/helpers.c | 33 +
 1 file changed, 29 insertions(+), 4 deletions(-)

diff --git a/drivers/media/platform/qcom/venus/helpers.c 
b/drivers/media/platform/qcom/venus/helpers.c
index 2c976e4..edf403d 100644
--- a/drivers/media/platform/qcom/venus/helpers.c
+++ b/drivers/media/platform/qcom/venus/helpers.c
@@ -399,17 +399,26 @@ static int scale_clocks(struct venus_inst *inst)
return ret;
 }
 
-static unsigned long calculate_vpp_freq(struct venus_inst *inst)
+static unsigned long calculate_inst_freq(struct venus_inst *inst,
+unsigned long filled_len)
 {
-   unsigned long vpp_freq = 0;
+   unsigned long vpp_freq = 0, vsp_freq = 0;
+   u64 fps = inst->fps;
u32 mbs_per_sec;
 
mbs_per_sec = load_per_instance(inst);
vpp_freq = mbs_per_sec * inst->clk_data.codec_freq_data->vpp_freq;
/* 21 / 20 is overhead factor */
vpp_freq += vpp_freq / 20;
+   vsp_freq = mbs_per_sec * inst->clk_data.codec_freq_data->vsp_freq;
 
-   return vpp_freq;
+   /* 10 / 7 is overhead factor */
+   if (inst->session_type == VIDC_SESSION_TYPE_ENC)
+   vsp_freq += (inst->controls.enc.bitrate * 10) / 7;
+   else
+   vsp_freq += ((fps * filled_len * 8) * 10) / 7;
+
+   return max(vpp_freq, vsp_freq);
 }
 
 static int scale_clocks_v4(struct venus_inst *inst)
@@ -417,13 +426,27 @@ static int scale_clocks_v4(struct venus_inst *inst)
struct venus_core *core = inst->core;
const struct freq_tbl *table = core->res->freq_tbl;
unsigned int num_rows = core->res->freq_tbl_size;
+   struct v4l2_m2m_ctx *m2m_ctx = inst->m2m_ctx;
struct clk *clk = core->clks[0];
struct device *dev = core->dev;
unsigned int i;
unsigned long freq = 0, freq_core1 = 0, freq_core2 = 0;
+   unsigned long filled_len = 0;
+   struct venus_buffer *buf, *n;
+   struct vb2_buffer *vb;
int ret;
 
-   freq = calculate_vpp_freq(inst);
+   mutex_lock(>lock);
+   v4l2_m2m_for_each_src_buf_safe(m2m_ctx, buf, n) {
+   vb = >vb.vb2_buf;
+   filled_len = max(filled_len, vb2_get_plane_payload(vb, 0));
+   }
+   mutex_unlock(>lock);
+
+   if (inst->session_type == VIDC_SESSION_TYPE_DEC && !filled_len)
+   return 0;
+
+   freq = calculate_inst_freq(inst, filled_len);
 
if (freq > table[0].freq)
dev_warn(dev, "HW is overloaded, needed: %lu max: %lu\n",
@@ -1093,6 +1116,8 @@ void venus_helper_vb2_buf_queue(struct vb2_buffer *vb)
if (ret)
goto unlock;
 
+   load_scale_clocks(inst);
+
ret = session_process_buf(inst, vbuf);
if (ret)
return_buf_error(inst, vbuf);
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH v6 4/4] media: venus: Update core selection

2019-07-22 Thread Aniket Masule
Present core assignment is static. Introduced load balancing
across the cores. Load on earch core is calculated and core
with minimum load is assigned to given instance.

Signed-off-by: Aniket Masule 
---
 drivers/media/platform/qcom/venus/helpers.c| 69 +++---
 drivers/media/platform/qcom/venus/helpers.h|  2 +-
 drivers/media/platform/qcom/venus/hfi_helper.h |  1 +
 drivers/media/platform/qcom/venus/hfi_parser.h |  5 ++
 drivers/media/platform/qcom/venus/vdec.c   |  2 +-
 drivers/media/platform/qcom/venus/venc.c   |  2 +-
 6 files changed, 72 insertions(+), 9 deletions(-)

diff --git a/drivers/media/platform/qcom/venus/helpers.c 
b/drivers/media/platform/qcom/venus/helpers.c
index edf403d..3b6cbbf 100644
--- a/drivers/media/platform/qcom/venus/helpers.c
+++ b/drivers/media/platform/qcom/venus/helpers.c
@@ -26,6 +26,7 @@
 #include "helpers.h"
 #include "hfi_helper.h"
 #include "hfi_venus_io.h"
+#include "hfi_parser.h"
 
 struct intbuf {
struct list_head list;
@@ -331,6 +332,24 @@ static u32 load_per_instance(struct venus_inst *inst)
return mbs * inst->fps;
 }
 
+static u32 load_per_core(struct venus_core *core, u32 core_id)
+{
+   struct venus_inst *inst = NULL;
+   u32 mbs_per_sec = 0, load = 0;
+
+   mutex_lock(>lock);
+   list_for_each_entry(inst, >instances, list) {
+   if (inst->clk_data.core_id != core_id)
+   continue;
+
+   mbs_per_sec = load_per_instance(inst);
+   load += mbs_per_sec * inst->clk_data.codec_freq_data->vpp_freq;
+   }
+   mutex_unlock(>lock);
+
+   return load;
+}
+
 static u32 load_per_type(struct venus_core *core, u32 session_type)
 {
struct venus_inst *inst = NULL;
@@ -505,6 +524,16 @@ static int load_scale_clocks(struct venus_inst *inst)
return scale_clocks(inst);
 }
 
+int set_core_usage(struct venus_inst *inst, u32 usage)
+{
+   const u32 ptype = HFI_PROPERTY_CONFIG_VIDEOCORES_USAGE;
+   struct hfi_videocores_usage_type cu;
+
+   cu.video_core_enable_mask = usage;
+
+   return hfi_session_set_property(inst, ptype, );
+}
+
 static void fill_buffer_desc(const struct venus_buffer *buf,
 struct hfi_buffer_desc *bd, bool response)
 {
@@ -808,19 +837,47 @@ int venus_helper_set_work_mode(struct venus_inst *inst, 
u32 mode)
 }
 EXPORT_SYMBOL_GPL(venus_helper_set_work_mode);
 
-int venus_helper_set_core_usage(struct venus_inst *inst, u32 usage)
+int venus_helper_set_core(struct venus_inst *inst)
 {
-   const u32 ptype = HFI_PROPERTY_CONFIG_VIDEOCORES_USAGE;
-   struct hfi_videocores_usage_type cu;
+   struct venus_core *core = inst->core;
+   u32 min_core_id = 0, core1_load = 0, core2_load = 0;
+   unsigned long min_load, max_freq, cur_inst_load;
+   u32 cores_max;
+   int ret;
 
if (!IS_V4(inst->core))
return 0;
 
-   cu.video_core_enable_mask = usage;
+   core1_load = load_per_core(core, VIDC_CORE_ID_1);
+   core2_load = load_per_core(core, VIDC_CORE_ID_2);
+   min_core_id = core1_load < core2_load ? VIDC_CORE_ID_1 : VIDC_CORE_ID_2;
+   min_load = min(core1_load, core2_load);
+   cores_max = core_num_max(inst);
 
-   return hfi_session_set_property(inst, ptype, );
+   if (cores_max < VIDC_CORE_ID_2) {
+   min_core_id = VIDC_CORE_ID_1;
+   min_load = core1_load;
+   }
+
+   cur_inst_load = load_per_instance(inst) *
+   inst->clk_data.codec_freq_data->vpp_freq;
+   max_freq = core->res->freq_tbl[0].freq;
+
+   if ((cur_inst_load + min_load) > max_freq) {
+   dev_warn(core->dev, "HW is overloaded, needed: %lu max: %lu\n",
+cur_inst_load, max_freq);
+   return -EINVAL;
+   }
+
+   ret = set_core_usage(inst, min_core_id);
+   if (ret)
+   return ret;
+
+   inst->clk_data.core_id = min_core_id;
+
+   return 0;
 }
-EXPORT_SYMBOL_GPL(venus_helper_set_core_usage);
+EXPORT_SYMBOL_GPL(venus_helper_set_core);
 
 int venus_helper_init_codec_freq_data(struct venus_inst *inst)
 {
diff --git a/drivers/media/platform/qcom/venus/helpers.h 
b/drivers/media/platform/qcom/venus/helpers.h
index 2c13245..1034111 100644
--- a/drivers/media/platform/qcom/venus/helpers.h
+++ b/drivers/media/platform/qcom/venus/helpers.h
@@ -42,7 +42,7 @@ int venus_helper_set_output_resolution(struct venus_inst 
*inst,
   u32 buftype);
 int venus_helper_set_work_mode(struct venus_inst *inst, u32 mode);
 int venus_helper_init_codec_freq_data(struct venus_inst *inst);
-int venus_helper_set_core_usage(struct venus_inst *inst, u32 usage);
+int venus_helper_set_core(struct venus_inst *inst);
 int venus_helper_set_num_bufs(struct venus_inst *inst, unsigned int input_bufs,
  

[PATCH v6 0/4] media: venus: Update clock scaling and core selection

2019-07-22 Thread Aniket Masule
In this patch series, clock scaling and core selection methods are
updated. Current clock scaling and core selection methods are same
for vpu4 and previous versions. Introducing load calculations using
vpp cycles, which indicates the cycles required by video hardware to
process each macroblock. Also adding vsp cycles, cycles require by
stream processor. Clock scaling is now done more precisely using vpp
and vsp cycles. Instance is assigned to core with minimum load, instead
of static assignment.

Changes since v5:
 - Corrected load_per_core calculations.

Changes since v4:
 - Added call to load_scale_clocks from venus_helper_vb2_buf_queue.
 - Modified check to match core_id in core_selection.

Changes since v3:
 - vsp_cycles and vpp_cyles are now unsigned long.
 - Core number counting aligned with VIDC_CORE_ID_.
 - Aligned hardware overload handling of scale_clocks_v4 with scale_clocks.
 - Added bitrate based clock scaling patch in this patch series.
 - Instance state check is now moved from scale_clocks to load_scale_clocks.

Aniket Masule (4):
  media: venus: Add codec data table
  media: venus: Update clock scaling
  media: venus: Update to bitrate based clock scaling
  media: venus: Update core selection

 drivers/media/platform/qcom/venus/core.c   |  13 ++
 drivers/media/platform/qcom/venus/core.h   |  16 ++
 drivers/media/platform/qcom/venus/helpers.c| 215 +++--
 drivers/media/platform/qcom/venus/helpers.h|   3 +-
 drivers/media/platform/qcom/venus/hfi_helper.h |   1 +
 drivers/media/platform/qcom/venus/hfi_parser.h |   5 +
 drivers/media/platform/qcom/venus/vdec.c   |   6 +-
 drivers/media/platform/qcom/venus/venc.c   |   6 +-
 8 files changed, 252 insertions(+), 13 deletions(-)

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH v5 3/4] media: venus: Update to bitrate based clock scaling

2019-07-15 Thread Aniket Masule
Introduced clock scaling using bitrate, preavious
calculations consider only the cycles per mb.
Also, clock scaling is now triggered before every
buffer being queued to the device. This helps in
deciding precise clock cycles required.

Signed-off-by: Aniket Masule 
---
 drivers/media/platform/qcom/venus/helpers.c | 33 +
 1 file changed, 29 insertions(+), 4 deletions(-)

diff --git a/drivers/media/platform/qcom/venus/helpers.c 
b/drivers/media/platform/qcom/venus/helpers.c
index 2c976e4..edf403d 100644
--- a/drivers/media/platform/qcom/venus/helpers.c
+++ b/drivers/media/platform/qcom/venus/helpers.c
@@ -399,17 +399,26 @@ static int scale_clocks(struct venus_inst *inst)
return ret;
 }
 
-static unsigned long calculate_vpp_freq(struct venus_inst *inst)
+static unsigned long calculate_inst_freq(struct venus_inst *inst,
+unsigned long filled_len)
 {
-   unsigned long vpp_freq = 0;
+   unsigned long vpp_freq = 0, vsp_freq = 0;
+   u64 fps = inst->fps;
u32 mbs_per_sec;
 
mbs_per_sec = load_per_instance(inst);
vpp_freq = mbs_per_sec * inst->clk_data.codec_freq_data->vpp_freq;
/* 21 / 20 is overhead factor */
vpp_freq += vpp_freq / 20;
+   vsp_freq = mbs_per_sec * inst->clk_data.codec_freq_data->vsp_freq;
 
-   return vpp_freq;
+   /* 10 / 7 is overhead factor */
+   if (inst->session_type == VIDC_SESSION_TYPE_ENC)
+   vsp_freq += (inst->controls.enc.bitrate * 10) / 7;
+   else
+   vsp_freq += ((fps * filled_len * 8) * 10) / 7;
+
+   return max(vpp_freq, vsp_freq);
 }
 
 static int scale_clocks_v4(struct venus_inst *inst)
@@ -417,13 +426,27 @@ static int scale_clocks_v4(struct venus_inst *inst)
struct venus_core *core = inst->core;
const struct freq_tbl *table = core->res->freq_tbl;
unsigned int num_rows = core->res->freq_tbl_size;
+   struct v4l2_m2m_ctx *m2m_ctx = inst->m2m_ctx;
struct clk *clk = core->clks[0];
struct device *dev = core->dev;
unsigned int i;
unsigned long freq = 0, freq_core1 = 0, freq_core2 = 0;
+   unsigned long filled_len = 0;
+   struct venus_buffer *buf, *n;
+   struct vb2_buffer *vb;
int ret;
 
-   freq = calculate_vpp_freq(inst);
+   mutex_lock(>lock);
+   v4l2_m2m_for_each_src_buf_safe(m2m_ctx, buf, n) {
+   vb = >vb.vb2_buf;
+   filled_len = max(filled_len, vb2_get_plane_payload(vb, 0));
+   }
+   mutex_unlock(>lock);
+
+   if (inst->session_type == VIDC_SESSION_TYPE_DEC && !filled_len)
+   return 0;
+
+   freq = calculate_inst_freq(inst, filled_len);
 
if (freq > table[0].freq)
dev_warn(dev, "HW is overloaded, needed: %lu max: %lu\n",
@@ -1093,6 +1116,8 @@ void venus_helper_vb2_buf_queue(struct vb2_buffer *vb)
if (ret)
goto unlock;
 
+   load_scale_clocks(inst);
+
ret = session_process_buf(inst, vbuf);
if (ret)
return_buf_error(inst, vbuf);
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH v5 4/4] media: venus: Update core selection

2019-07-15 Thread Aniket Masule
Present core assignment is static. Introduced load balancing
across the cores. Load on earch core is calculated and core
with minimum load is assigned to given instance.

Signed-off-by: Aniket Masule 
---
 drivers/media/platform/qcom/venus/helpers.c| 69 +++---
 drivers/media/platform/qcom/venus/helpers.h|  2 +-
 drivers/media/platform/qcom/venus/hfi_helper.h |  1 +
 drivers/media/platform/qcom/venus/hfi_parser.h |  5 ++
 drivers/media/platform/qcom/venus/vdec.c   |  2 +-
 drivers/media/platform/qcom/venus/venc.c   |  2 +-
 6 files changed, 72 insertions(+), 9 deletions(-)

diff --git a/drivers/media/platform/qcom/venus/helpers.c 
b/drivers/media/platform/qcom/venus/helpers.c
index edf403d..d479793 100644
--- a/drivers/media/platform/qcom/venus/helpers.c
+++ b/drivers/media/platform/qcom/venus/helpers.c
@@ -26,6 +26,7 @@
 #include "helpers.h"
 #include "hfi_helper.h"
 #include "hfi_venus_io.h"
+#include "hfi_parser.h"
 
 struct intbuf {
struct list_head list;
@@ -331,6 +332,24 @@ static u32 load_per_instance(struct venus_inst *inst)
return mbs * inst->fps;
 }
 
+static u32 load_per_core(struct venus_core *core, u32 core_id)
+{
+   struct venus_inst *inst = NULL;
+   u32 mbs_per_sec = 0, load = 0;
+
+   mutex_lock(>lock);
+   list_for_each_entry(inst, >instances, list) {
+   if (inst->clk_data.core_id != core_id)
+   continue;
+
+   mbs_per_sec = load_per_instance(inst);
+   load = mbs_per_sec * inst->clk_data.codec_freq_data->vpp_freq;
+   }
+   mutex_unlock(>lock);
+
+   return load;
+}
+
 static u32 load_per_type(struct venus_core *core, u32 session_type)
 {
struct venus_inst *inst = NULL;
@@ -505,6 +524,16 @@ static int load_scale_clocks(struct venus_inst *inst)
return scale_clocks(inst);
 }
 
+int set_core_usage(struct venus_inst *inst, u32 usage)
+{
+   const u32 ptype = HFI_PROPERTY_CONFIG_VIDEOCORES_USAGE;
+   struct hfi_videocores_usage_type cu;
+
+   cu.video_core_enable_mask = usage;
+
+   return hfi_session_set_property(inst, ptype, );
+}
+
 static void fill_buffer_desc(const struct venus_buffer *buf,
 struct hfi_buffer_desc *bd, bool response)
 {
@@ -808,19 +837,47 @@ int venus_helper_set_work_mode(struct venus_inst *inst, 
u32 mode)
 }
 EXPORT_SYMBOL_GPL(venus_helper_set_work_mode);
 
-int venus_helper_set_core_usage(struct venus_inst *inst, u32 usage)
+int venus_helper_set_core(struct venus_inst *inst)
 {
-   const u32 ptype = HFI_PROPERTY_CONFIG_VIDEOCORES_USAGE;
-   struct hfi_videocores_usage_type cu;
+   struct venus_core *core = inst->core;
+   u32 min_core_id = 0, core1_load = 0, core2_load = 0;
+   unsigned long min_load, max_freq, cur_inst_load;
+   u32 cores_max;
+   int ret;
 
if (!IS_V4(inst->core))
return 0;
 
-   cu.video_core_enable_mask = usage;
+   core1_load = load_per_core(core, VIDC_CORE_ID_1);
+   core2_load = load_per_core(core, VIDC_CORE_ID_2);
+   min_core_id = core1_load < core2_load ? VIDC_CORE_ID_1 : VIDC_CORE_ID_2;
+   min_load = min(core1_load, core2_load);
+   cores_max = core_num_max(inst);
 
-   return hfi_session_set_property(inst, ptype, );
+   if (cores_max < VIDC_CORE_ID_2) {
+   min_core_id = VIDC_CORE_ID_1;
+   min_load = core1_load;
+   }
+
+   cur_inst_load = load_per_instance(inst) *
+   inst->clk_data.codec_freq_data->vpp_freq;
+   max_freq = core->res->freq_tbl[0].freq;
+
+   if ((cur_inst_load + min_load)  > max_freq) {
+   dev_warn(core->dev, "HW is overloaded, needed: %lu max: %lu\n",
+cur_inst_load, max_freq);
+   return -EINVAL;
+   }
+
+   ret = set_core_usage(inst, min_core_id);
+   if (ret)
+   return ret;
+
+   inst->clk_data.core_id = min_core_id;
+
+   return 0;
 }
-EXPORT_SYMBOL_GPL(venus_helper_set_core_usage);
+EXPORT_SYMBOL_GPL(venus_helper_set_core);
 
 int venus_helper_init_codec_freq_data(struct venus_inst *inst)
 {
diff --git a/drivers/media/platform/qcom/venus/helpers.h 
b/drivers/media/platform/qcom/venus/helpers.h
index 2c13245..1034111 100644
--- a/drivers/media/platform/qcom/venus/helpers.h
+++ b/drivers/media/platform/qcom/venus/helpers.h
@@ -42,7 +42,7 @@ int venus_helper_set_output_resolution(struct venus_inst 
*inst,
   u32 buftype);
 int venus_helper_set_work_mode(struct venus_inst *inst, u32 mode);
 int venus_helper_init_codec_freq_data(struct venus_inst *inst);
-int venus_helper_set_core_usage(struct venus_inst *inst, u32 usage);
+int venus_helper_set_core(struct venus_inst *inst);
 int venus_helper_set_num_bufs(struct venus_inst *inst, unsigned int input_bufs,
  

[PATCH v5 2/4] media: venus: Update clock scaling

2019-07-15 Thread Aniket Masule
Current clock scaling calculations are same for vpu4 and
previous versions. For vpu4, Clock scaling calculations
are updated with cycles/mb. This helps in getting precise
clock required.

Signed-off-by: Aniket Masule 
---
 drivers/media/platform/qcom/venus/helpers.c | 91 +++--
 1 file changed, 87 insertions(+), 4 deletions(-)

diff --git a/drivers/media/platform/qcom/venus/helpers.c 
b/drivers/media/platform/qcom/venus/helpers.c
index 7492373..2c976e4 100644
--- a/drivers/media/platform/qcom/venus/helpers.c
+++ b/drivers/media/platform/qcom/venus/helpers.c
@@ -348,8 +348,9 @@ static u32 load_per_type(struct venus_core *core, u32 
session_type)
return mbs_per_sec;
 }
 
-static int load_scale_clocks(struct venus_core *core)
+static int scale_clocks(struct venus_inst *inst)
 {
+   struct venus_core *core = inst->core;
const struct freq_tbl *table = core->res->freq_tbl;
unsigned int num_rows = core->res->freq_tbl_size;
unsigned long freq = table[0].freq;
@@ -398,6 +399,89 @@ static int load_scale_clocks(struct venus_core *core)
return ret;
 }
 
+static unsigned long calculate_vpp_freq(struct venus_inst *inst)
+{
+   unsigned long vpp_freq = 0;
+   u32 mbs_per_sec;
+
+   mbs_per_sec = load_per_instance(inst);
+   vpp_freq = mbs_per_sec * inst->clk_data.codec_freq_data->vpp_freq;
+   /* 21 / 20 is overhead factor */
+   vpp_freq += vpp_freq / 20;
+
+   return vpp_freq;
+}
+
+static int scale_clocks_v4(struct venus_inst *inst)
+{
+   struct venus_core *core = inst->core;
+   const struct freq_tbl *table = core->res->freq_tbl;
+   unsigned int num_rows = core->res->freq_tbl_size;
+   struct clk *clk = core->clks[0];
+   struct device *dev = core->dev;
+   unsigned int i;
+   unsigned long freq = 0, freq_core1 = 0, freq_core2 = 0;
+   int ret;
+
+   freq = calculate_vpp_freq(inst);
+
+   if (freq > table[0].freq)
+   dev_warn(dev, "HW is overloaded, needed: %lu max: %lu\n",
+freq, table[0].freq);
+
+   for (i = 0; i < num_rows; i++) {
+   if (freq > table[i].freq)
+   break;
+   freq = table[i].freq;
+   }
+
+   inst->clk_data.freq = freq;
+
+   mutex_lock(>lock);
+   list_for_each_entry(inst, >instances, list) {
+   if (inst->clk_data.core_id == VIDC_CORE_ID_1) {
+   freq_core1 += inst->clk_data.freq;
+   } else if (inst->clk_data.core_id == VIDC_CORE_ID_2) {
+   freq_core2 += inst->clk_data.freq;
+   } else if (inst->clk_data.core_id == VIDC_CORE_ID_3) {
+   freq_core1 += inst->clk_data.freq;
+   freq_core2 += inst->clk_data.freq;
+   }
+   }
+   mutex_unlock(>lock);
+
+   freq = max(freq_core1, freq_core2);
+
+   ret = clk_set_rate(clk, freq);
+   if (ret)
+   goto err;
+
+   ret = clk_set_rate(core->core0_clk, freq);
+   if (ret)
+   goto err;
+
+   ret = clk_set_rate(core->core1_clk, freq);
+   if (ret)
+   goto err;
+
+   return 0;
+
+err:
+   dev_err(dev, "failed to set clock rate %lu (%d)\n", freq, ret);
+   return ret;
+}
+
+static int load_scale_clocks(struct venus_inst *inst)
+{
+   if (IS_V4(inst->core))
+   return scale_clocks_v4(inst);
+
+   if (inst->state == INST_START)
+   return 0;
+
+   return scale_clocks(inst);
+}
+
 static void fill_buffer_desc(const struct venus_buffer *buf,
 struct hfi_buffer_desc *bd, bool response)
 {
@@ -1053,7 +1137,7 @@ void venus_helper_vb2_stop_streaming(struct vb2_queue *q)
 
venus_helper_free_dpb_bufs(inst);
 
-   load_scale_clocks(core);
+   load_scale_clocks(inst);
INIT_LIST_HEAD(>registeredbufs);
}
 
@@ -1070,7 +1154,6 @@ void venus_helper_vb2_stop_streaming(struct vb2_queue *q)
 
 int venus_helper_vb2_start_streaming(struct venus_inst *inst)
 {
-   struct venus_core *core = inst->core;
int ret;
 
ret = intbufs_alloc(inst);
@@ -1081,7 +1164,7 @@ int venus_helper_vb2_start_streaming(struct venus_inst 
*inst)
if (ret)
goto err_bufs_free;
 
-   load_scale_clocks(core);
+   load_scale_clocks(inst);
 
ret = hfi_session_load_res(inst);
if (ret)
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH v5 1/4] media: venus: Add codec data table

2019-07-15 Thread Aniket Masule
Add vpp cycles for different types of codec.
It indicates the cycles required by video hardware
to process each macroblock. Add vsp cycles, cycles
required by stream processor. Initialize the codec
data with core resources.

Signed-off-by: Aniket Masule 
---
 drivers/media/platform/qcom/venus/core.c| 13 +
 drivers/media/platform/qcom/venus/core.h| 16 +++
 drivers/media/platform/qcom/venus/helpers.c | 30 +
 drivers/media/platform/qcom/venus/helpers.h |  1 +
 drivers/media/platform/qcom/venus/vdec.c|  4 
 drivers/media/platform/qcom/venus/venc.c|  4 
 6 files changed, 68 insertions(+)

diff --git a/drivers/media/platform/qcom/venus/core.c 
b/drivers/media/platform/qcom/venus/core.c
index 7393667..ad6bb74 100644
--- a/drivers/media/platform/qcom/venus/core.c
+++ b/drivers/media/platform/qcom/venus/core.c
@@ -473,9 +473,22 @@ static __maybe_unused int venus_runtime_resume(struct 
device *dev)
{  244800, 1 }, /* 1920x1080@30 */
 };
 
+static struct codec_freq_data sdm845_codec_freq_data[] =  {
+   { V4L2_PIX_FMT_H264, VIDC_SESSION_TYPE_ENC, 675, 10 },
+   { V4L2_PIX_FMT_HEVC, VIDC_SESSION_TYPE_ENC, 675, 10 },
+   { V4L2_PIX_FMT_VP8, VIDC_SESSION_TYPE_ENC, 675, 10 },
+   { V4L2_PIX_FMT_MPEG2, VIDC_SESSION_TYPE_DEC, 200, 10 },
+   { V4L2_PIX_FMT_H264, VIDC_SESSION_TYPE_DEC, 200, 10 },
+   { V4L2_PIX_FMT_HEVC, VIDC_SESSION_TYPE_DEC, 200, 10 },
+   { V4L2_PIX_FMT_VP8, VIDC_SESSION_TYPE_DEC, 200, 10 },
+   { V4L2_PIX_FMT_VP9, VIDC_SESSION_TYPE_DEC, 200, 10 },
+};
+
 static const struct venus_resources sdm845_res = {
.freq_tbl = sdm845_freq_table,
.freq_tbl_size = ARRAY_SIZE(sdm845_freq_table),
+   .codec_freq_data = sdm845_codec_freq_data,
+   .codec_freq_data_size = ARRAY_SIZE(sdm845_codec_freq_data),
.clks = {"core", "iface", "bus" },
.clks_num = 3,
.max_load = 2563200,
diff --git a/drivers/media/platform/qcom/venus/core.h 
b/drivers/media/platform/qcom/venus/core.h
index 7a3feb5..b8aef19 100644
--- a/drivers/media/platform/qcom/venus/core.h
+++ b/drivers/media/platform/qcom/venus/core.h
@@ -35,12 +35,21 @@ struct reg_val {
u32 value;
 };
 
+struct codec_freq_data {
+   u32 pixfmt;
+   u32 session_type;
+   unsigned long vpp_freq;
+   unsigned long vsp_freq;
+};
+
 struct venus_resources {
u64 dma_mask;
const struct freq_tbl *freq_tbl;
unsigned int freq_tbl_size;
const struct reg_val *reg_tbl;
unsigned int reg_tbl_size;
+   const struct codec_freq_data *codec_freq_data;
+   unsigned int codec_freq_data_size;
const char * const clks[VIDC_CLKS_NUM_MAX];
unsigned int clks_num;
enum hfi_version hfi_version;
@@ -216,6 +225,12 @@ struct venus_buffer {
struct list_head ref_list;
 };
 
+struct clock_data {
+   u32 core_id;
+   unsigned long freq;
+   const struct codec_freq_data *codec_freq_data;
+};
+
 #define to_venus_buffer(ptr)   container_of(ptr, struct venus_buffer, vb)
 
 /**
@@ -275,6 +290,7 @@ struct venus_inst {
struct list_head list;
struct mutex lock;
struct venus_core *core;
+   struct clock_data clk_data;
struct list_head dpbbufs;
struct list_head internalbufs;
struct list_head registeredbufs;
diff --git a/drivers/media/platform/qcom/venus/helpers.c 
b/drivers/media/platform/qcom/venus/helpers.c
index 5cad601..7492373 100644
--- a/drivers/media/platform/qcom/venus/helpers.c
+++ b/drivers/media/platform/qcom/venus/helpers.c
@@ -715,6 +715,36 @@ int venus_helper_set_core_usage(struct venus_inst *inst, 
u32 usage)
 }
 EXPORT_SYMBOL_GPL(venus_helper_set_core_usage);
 
+int venus_helper_init_codec_freq_data(struct venus_inst *inst)
+{
+   const struct codec_freq_data *data;
+   unsigned int i, data_size;
+   u32 pixfmt;
+   int ret = 0;
+
+   if (!IS_V4(inst->core))
+   return 0;
+
+   data = inst->core->res->codec_freq_data;
+   data_size = inst->core->res->codec_freq_data_size;
+   pixfmt = inst->session_type == VIDC_SESSION_TYPE_DEC ?
+   inst->fmt_out->pixfmt : inst->fmt_cap->pixfmt;
+
+   for (i = 0; i < data_size; i++) {
+   if (data[i].pixfmt == pixfmt &&
+   data[i].session_type == inst->session_type) {
+   inst->clk_data.codec_freq_data = [i];
+   break;
+   }
+   }
+
+   if (!inst->clk_data.codec_freq_data)
+   ret = -EINVAL;
+
+   return ret;
+}
+EXPORT_SYMBOL_GPL(venus_helper_init_codec_freq_data);
+
 int venus_helper_set_num_bufs(struct venus_inst *inst, unsigned int input_bufs,
  unsigned int output_bufs,
  unsigned int output2_bufs)
diff --git a/drivers/me

[PATCH v5 0/4] media: venus: Update clock scaling and core selection

2019-07-15 Thread Aniket Masule
In this patch series, clock scaling and core selection methods are
updated. Current clock scaling and core selection methods are same
for vpu4 and previous versions. Introducing load calculations using
vpp cycles, which indicates the cycles required by video hardware to
process each macroblock. Also adding vsp cycles, cycles require by
stream processor. Clock scaling is now done more precisely using vpp
and vsp cycles. Instance is assigned to core with minimum load, instead
of static assignment.

Changes since v4:
 - Added call to load_scale_clocks from venus_helper_vb2_buf_queue.
 - Modified check to match core_id in core_selection.

Changes since v3:
 - vsp_cycles and vpp_cyles are now unsigned long.
 - Core number counting aligned with VIDC_CORE_ID_.
 - Aligned hardware overload handling of scale_clocks_v4 with scale_clocks.
 - Added bitrate based clock scaling patch in this patch series.
 - Instance state check is now moved from scale_clocks to load_scale_clocks.

Aniket Masule (4):
  media: venus: Add codec data table
  media: venus: Update clock scaling
  media: venus: Update to bitrate based clock scaling
  media: venus: Update core selection

 drivers/media/platform/qcom/venus/core.c   |  13 ++
 drivers/media/platform/qcom/venus/core.h   |  16 ++
 drivers/media/platform/qcom/venus/helpers.c| 215 +++--
 drivers/media/platform/qcom/venus/helpers.h|   3 +-
 drivers/media/platform/qcom/venus/hfi_helper.h |   1 +
 drivers/media/platform/qcom/venus/hfi_parser.h |   5 +
 drivers/media/platform/qcom/venus/vdec.c   |   6 +-
 drivers/media/platform/qcom/venus/venc.c   |   6 +-
 8 files changed, 252 insertions(+), 13 deletions(-)

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH v4 2/4] media: venus: Update clock scaling

2019-07-02 Thread Aniket Masule
Current clock scaling calculations are same for vpu4 and
previous versions. For vpu4, Clock scaling calculations
are updated with cycles/mb. This helps in getting precise
clock required.

Signed-off-by: Aniket Masule 
---
 drivers/media/platform/qcom/venus/helpers.c | 91 +++--
 1 file changed, 87 insertions(+), 4 deletions(-)

diff --git a/drivers/media/platform/qcom/venus/helpers.c 
b/drivers/media/platform/qcom/venus/helpers.c
index 7492373..2c976e4 100644
--- a/drivers/media/platform/qcom/venus/helpers.c
+++ b/drivers/media/platform/qcom/venus/helpers.c
@@ -348,8 +348,9 @@ static u32 load_per_type(struct venus_core *core, u32 
session_type)
return mbs_per_sec;
 }
 
-static int load_scale_clocks(struct venus_core *core)
+static int scale_clocks(struct venus_inst *inst)
 {
+   struct venus_core *core = inst->core;
const struct freq_tbl *table = core->res->freq_tbl;
unsigned int num_rows = core->res->freq_tbl_size;
unsigned long freq = table[0].freq;
@@ -398,6 +399,89 @@ static int load_scale_clocks(struct venus_core *core)
return ret;
 }
 
+static unsigned long calculate_vpp_freq(struct venus_inst *inst)
+{
+   unsigned long vpp_freq = 0;
+   u32 mbs_per_sec;
+
+   mbs_per_sec = load_per_instance(inst);
+   vpp_freq = mbs_per_sec * inst->clk_data.codec_freq_data->vpp_freq;
+   /* 21 / 20 is overhead factor */
+   vpp_freq += vpp_freq / 20;
+
+   return vpp_freq;
+}
+
+static int scale_clocks_v4(struct venus_inst *inst)
+{
+   struct venus_core *core = inst->core;
+   const struct freq_tbl *table = core->res->freq_tbl;
+   unsigned int num_rows = core->res->freq_tbl_size;
+   struct clk *clk = core->clks[0];
+   struct device *dev = core->dev;
+   unsigned int i;
+   unsigned long freq = 0, freq_core1 = 0, freq_core2 = 0;
+   int ret;
+
+   freq = calculate_vpp_freq(inst);
+
+   if (freq > table[0].freq)
+   dev_warn(dev, "HW is overloaded, needed: %lu max: %lu\n",
+freq, table[0].freq);
+
+   for (i = 0; i < num_rows; i++) {
+   if (freq > table[i].freq)
+   break;
+   freq = table[i].freq;
+   }
+
+   inst->clk_data.freq = freq;
+
+   mutex_lock(>lock);
+   list_for_each_entry(inst, >instances, list) {
+   if (inst->clk_data.core_id == VIDC_CORE_ID_1) {
+   freq_core1 += inst->clk_data.freq;
+   } else if (inst->clk_data.core_id == VIDC_CORE_ID_2) {
+   freq_core2 += inst->clk_data.freq;
+   } else if (inst->clk_data.core_id == VIDC_CORE_ID_3) {
+   freq_core1 += inst->clk_data.freq;
+   freq_core2 += inst->clk_data.freq;
+   }
+   }
+   mutex_unlock(>lock);
+
+   freq = max(freq_core1, freq_core2);
+
+   ret = clk_set_rate(clk, freq);
+   if (ret)
+   goto err;
+
+   ret = clk_set_rate(core->core0_clk, freq);
+   if (ret)
+   goto err;
+
+   ret = clk_set_rate(core->core1_clk, freq);
+   if (ret)
+   goto err;
+
+   return 0;
+
+err:
+   dev_err(dev, "failed to set clock rate %lu (%d)\n", freq, ret);
+   return ret;
+}
+
+static int load_scale_clocks(struct venus_inst *inst)
+{
+   if (IS_V4(inst->core))
+   return scale_clocks_v4(inst);
+
+   if (inst->state == INST_START)
+   return 0;
+
+   return scale_clocks(inst);
+}
+
 static void fill_buffer_desc(const struct venus_buffer *buf,
 struct hfi_buffer_desc *bd, bool response)
 {
@@ -1053,7 +1137,7 @@ void venus_helper_vb2_stop_streaming(struct vb2_queue *q)
 
venus_helper_free_dpb_bufs(inst);
 
-   load_scale_clocks(core);
+   load_scale_clocks(inst);
INIT_LIST_HEAD(>registeredbufs);
}
 
@@ -1070,7 +1154,6 @@ void venus_helper_vb2_stop_streaming(struct vb2_queue *q)
 
 int venus_helper_vb2_start_streaming(struct venus_inst *inst)
 {
-   struct venus_core *core = inst->core;
int ret;
 
ret = intbufs_alloc(inst);
@@ -1081,7 +1164,7 @@ int venus_helper_vb2_start_streaming(struct venus_inst 
*inst)
if (ret)
goto err_bufs_free;
 
-   load_scale_clocks(core);
+   load_scale_clocks(inst);
 
ret = hfi_session_load_res(inst);
if (ret)
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH v4 4/4] media: venus: Update core selection

2019-07-02 Thread Aniket Masule
Present core assignment is static. Introduced load balancing
across the cores. Load on earch core is calculated and core
with minimum load is assigned to given instance.

Signed-off-by: Aniket Masule 
---
 drivers/media/platform/qcom/venus/helpers.c| 69 +++---
 drivers/media/platform/qcom/venus/helpers.h|  2 +-
 drivers/media/platform/qcom/venus/hfi_helper.h |  1 +
 drivers/media/platform/qcom/venus/hfi_parser.h |  5 ++
 drivers/media/platform/qcom/venus/vdec.c   |  2 +-
 drivers/media/platform/qcom/venus/venc.c   |  2 +-
 6 files changed, 72 insertions(+), 9 deletions(-)

diff --git a/drivers/media/platform/qcom/venus/helpers.c 
b/drivers/media/platform/qcom/venus/helpers.c
index 5726d86..321e9f7 100644
--- a/drivers/media/platform/qcom/venus/helpers.c
+++ b/drivers/media/platform/qcom/venus/helpers.c
@@ -26,6 +26,7 @@
 #include "helpers.h"
 #include "hfi_helper.h"
 #include "hfi_venus_io.h"
+#include "hfi_parser.h"
 
 struct intbuf {
struct list_head list;
@@ -331,6 +332,24 @@ static u32 load_per_instance(struct venus_inst *inst)
return mbs * inst->fps;
 }
 
+static u32 load_per_core(struct venus_core *core, u32 core_id)
+{
+   struct venus_inst *inst = NULL;
+   u32 mbs_per_sec = 0, load = 0;
+
+   mutex_lock(>lock);
+   list_for_each_entry(inst, >instances, list) {
+   if (!(inst->clk_data.core_id == core_id))
+   continue;
+
+   mbs_per_sec = load_per_instance(inst);
+   load = mbs_per_sec * inst->clk_data.codec_freq_data->vpp_freq;
+   }
+   mutex_unlock(>lock);
+
+   return load;
+}
+
 static u32 load_per_type(struct venus_core *core, u32 session_type)
 {
struct venus_inst *inst = NULL;
@@ -505,6 +524,16 @@ static int load_scale_clocks(struct venus_inst *inst)
return scale_clocks(inst);
 }
 
+int set_core_usage(struct venus_inst *inst, u32 usage)
+{
+   const u32 ptype = HFI_PROPERTY_CONFIG_VIDEOCORES_USAGE;
+   struct hfi_videocores_usage_type cu;
+
+   cu.video_core_enable_mask = usage;
+
+   return hfi_session_set_property(inst, ptype, );
+}
+
 static void fill_buffer_desc(const struct venus_buffer *buf,
 struct hfi_buffer_desc *bd, bool response)
 {
@@ -808,19 +837,47 @@ int venus_helper_set_work_mode(struct venus_inst *inst, 
u32 mode)
 }
 EXPORT_SYMBOL_GPL(venus_helper_set_work_mode);
 
-int venus_helper_set_core_usage(struct venus_inst *inst, u32 usage)
+int venus_helper_set_core(struct venus_inst *inst)
 {
-   const u32 ptype = HFI_PROPERTY_CONFIG_VIDEOCORES_USAGE;
-   struct hfi_videocores_usage_type cu;
+   struct venus_core *core = inst->core;
+   u32 min_core_id = 0, core1_load = 0, core2_load = 0;
+   unsigned long min_load, max_freq, cur_inst_load;
+   u32 cores_max;
+   int ret;
 
if (!IS_V4(inst->core))
return 0;
 
-   cu.video_core_enable_mask = usage;
+   core1_load = load_per_core(core, VIDC_CORE_ID_1);
+   core2_load = load_per_core(core, VIDC_CORE_ID_2);
+   min_core_id = core1_load < core2_load ? VIDC_CORE_ID_1 : VIDC_CORE_ID_2;
+   min_load = min(core1_load, core2_load);
+   cores_max = core_num_max(inst);
 
-   return hfi_session_set_property(inst, ptype, );
+   if (cores_max < VIDC_CORE_ID_2) {
+   min_core_id = VIDC_CORE_ID_1;
+   min_load = core1_load;
+   }
+
+   cur_inst_load = load_per_instance(inst) *
+   inst->clk_data.codec_freq_data->vpp_freq;
+   max_freq = core->res->freq_tbl[0].freq;
+
+   if ((cur_inst_load + min_load)  > max_freq) {
+   dev_warn(core->dev, "HW is overloaded, needed: %lu max: %lu\n",
+cur_inst_load, max_freq);
+   return -EINVAL;
+   }
+
+   ret = set_core_usage(inst, min_core_id);
+   if (ret)
+   return ret;
+
+   inst->clk_data.core_id = min_core_id;
+
+   return 0;
 }
-EXPORT_SYMBOL_GPL(venus_helper_set_core_usage);
+EXPORT_SYMBOL_GPL(venus_helper_set_core);
 
 int venus_helper_init_codec_freq_data(struct venus_inst *inst)
 {
diff --git a/drivers/media/platform/qcom/venus/helpers.h 
b/drivers/media/platform/qcom/venus/helpers.h
index 2c13245..1034111 100644
--- a/drivers/media/platform/qcom/venus/helpers.h
+++ b/drivers/media/platform/qcom/venus/helpers.h
@@ -42,7 +42,7 @@ int venus_helper_set_output_resolution(struct venus_inst 
*inst,
   u32 buftype);
 int venus_helper_set_work_mode(struct venus_inst *inst, u32 mode);
 int venus_helper_init_codec_freq_data(struct venus_inst *inst);
-int venus_helper_set_core_usage(struct venus_inst *inst, u32 usage);
+int venus_helper_set_core(struct venus_inst *inst);
 int venus_helper_set_num_bufs(struct venus_inst *inst, unsigned int input_bufs,
  

[PATCH v4 3/4] media: venus: Update to bitrate based clock scaling

2019-07-02 Thread Aniket Masule
Introduced clock scaling using bitrate, preavious
calculations consider only the cycles per mb.
Also, clock scaling is now triggered before every
buffer being queued to the device. This helps in
deciding precise clock cycles required.

Signed-off-by: Aniket Masule 
---
 drivers/media/platform/qcom/venus/helpers.c | 31 +
 1 file changed, 27 insertions(+), 4 deletions(-)

diff --git a/drivers/media/platform/qcom/venus/helpers.c 
b/drivers/media/platform/qcom/venus/helpers.c
index 2c976e4..5726d86 100644
--- a/drivers/media/platform/qcom/venus/helpers.c
+++ b/drivers/media/platform/qcom/venus/helpers.c
@@ -399,17 +399,26 @@ static int scale_clocks(struct venus_inst *inst)
return ret;
 }
 
-static unsigned long calculate_vpp_freq(struct venus_inst *inst)
+static unsigned long calculate_inst_freq(struct venus_inst *inst,
+unsigned long filled_len)
 {
-   unsigned long vpp_freq = 0;
+   unsigned long vpp_freq = 0, vsp_freq = 0;
+   u64 fps = inst->fps;
u32 mbs_per_sec;
 
mbs_per_sec = load_per_instance(inst);
vpp_freq = mbs_per_sec * inst->clk_data.codec_freq_data->vpp_freq;
/* 21 / 20 is overhead factor */
vpp_freq += vpp_freq / 20;
+   vsp_freq = mbs_per_sec * inst->clk_data.codec_freq_data->vsp_freq;
 
-   return vpp_freq;
+   /* 10 / 7 is overhead factor */
+   if (inst->session_type == VIDC_SESSION_TYPE_ENC)
+   vsp_freq += (inst->controls.enc.bitrate * 10) / 7;
+   else
+   vsp_freq += ((fps * filled_len * 8) * 10) / 7;
+
+   return max(vpp_freq, vsp_freq);
 }
 
 static int scale_clocks_v4(struct venus_inst *inst)
@@ -417,13 +426,27 @@ static int scale_clocks_v4(struct venus_inst *inst)
struct venus_core *core = inst->core;
const struct freq_tbl *table = core->res->freq_tbl;
unsigned int num_rows = core->res->freq_tbl_size;
+   struct v4l2_m2m_ctx *m2m_ctx = inst->m2m_ctx;
struct clk *clk = core->clks[0];
struct device *dev = core->dev;
unsigned int i;
unsigned long freq = 0, freq_core1 = 0, freq_core2 = 0;
+   unsigned long filled_len = 0;
+   struct venus_buffer *buf, *n;
+   struct vb2_buffer *vb;
int ret;
 
-   freq = calculate_vpp_freq(inst);
+   mutex_lock(>lock);
+   v4l2_m2m_for_each_src_buf_safe(m2m_ctx, buf, n) {
+   vb = >vb.vb2_buf;
+   filled_len = max(filled_len, vb2_get_plane_payload(vb, 0));
+   }
+   mutex_unlock(>lock);
+
+   if (inst->session_type == VIDC_SESSION_TYPE_DEC && !filled_len)
+   return 0;
+
+   freq = calculate_inst_freq(inst, filled_len);
 
if (freq > table[0].freq)
dev_warn(dev, "HW is overloaded, needed: %lu max: %lu\n",
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH v4 0/4] media: venus: Update clock scaling and core selection

2019-07-02 Thread Aniket Masule
In this patch series, clock scaling and core selection methods are
updated. Current clock scaling and core selection methods are same
for vpu4 and previous versions. Introducing load calculations using
vpp cycles, which indicates the cycles required by video hardware to
process each macroblock. Also adding vsp cycles, cycles require by
stream processor. Clock scaling is now done more precisely using vpp
and vsp cycles. Instance is assigned to core with minimum load, instead
of static assignment.

Changes since v3:
 - vsp_cycles and vpp_cyles are now unsigned long.
 - Core number counting aligned with VIDC_CORE_ID_.
 - Aligned hardware overload handling of scale_clocks_v4 with scale_clocks.
 - Added bitrate based clock scaling patch in this patch series.
 - Instance state check is now moved from scale_clocks to load_scale_clocks.

Aniket Masule (4):
  media: venus: Add codec data table
  media: venus: Update clock scaling
  media: venus: Update to bitrate based clock scaling
  media: venus: Update core selection

 drivers/media/platform/qcom/venus/core.c   |  13 ++
 drivers/media/platform/qcom/venus/core.h   |  16 ++
 drivers/media/platform/qcom/venus/helpers.c| 213 +++--
 drivers/media/platform/qcom/venus/helpers.h|   3 +-
 drivers/media/platform/qcom/venus/hfi_helper.h |   1 +
 drivers/media/platform/qcom/venus/hfi_parser.h |   5 +
 drivers/media/platform/qcom/venus/vdec.c   |   6 +-
 drivers/media/platform/qcom/venus/venc.c   |   6 +-
 8 files changed, 250 insertions(+), 13 deletions(-)

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH v4 1/4] media: venus: Add codec data table

2019-07-02 Thread Aniket Masule
Add vpp cycles for different types of codec.
It indicates the cycles required by video hardware
to process each macroblock. Add vsp cycles, cycles
required by stream processor. Initialize the codec
data with core resources.

Signed-off-by: Aniket Masule 
---
 drivers/media/platform/qcom/venus/core.c| 13 +
 drivers/media/platform/qcom/venus/core.h| 16 +++
 drivers/media/platform/qcom/venus/helpers.c | 30 +
 drivers/media/platform/qcom/venus/helpers.h |  1 +
 drivers/media/platform/qcom/venus/vdec.c|  4 
 drivers/media/platform/qcom/venus/venc.c|  4 
 6 files changed, 68 insertions(+)

diff --git a/drivers/media/platform/qcom/venus/core.c 
b/drivers/media/platform/qcom/venus/core.c
index 7393667..ad6bb74 100644
--- a/drivers/media/platform/qcom/venus/core.c
+++ b/drivers/media/platform/qcom/venus/core.c
@@ -473,9 +473,22 @@ static __maybe_unused int venus_runtime_resume(struct 
device *dev)
{  244800, 1 }, /* 1920x1080@30 */
 };
 
+static struct codec_freq_data sdm845_codec_freq_data[] =  {
+   { V4L2_PIX_FMT_H264, VIDC_SESSION_TYPE_ENC, 675, 10 },
+   { V4L2_PIX_FMT_HEVC, VIDC_SESSION_TYPE_ENC, 675, 10 },
+   { V4L2_PIX_FMT_VP8, VIDC_SESSION_TYPE_ENC, 675, 10 },
+   { V4L2_PIX_FMT_MPEG2, VIDC_SESSION_TYPE_DEC, 200, 10 },
+   { V4L2_PIX_FMT_H264, VIDC_SESSION_TYPE_DEC, 200, 10 },
+   { V4L2_PIX_FMT_HEVC, VIDC_SESSION_TYPE_DEC, 200, 10 },
+   { V4L2_PIX_FMT_VP8, VIDC_SESSION_TYPE_DEC, 200, 10 },
+   { V4L2_PIX_FMT_VP9, VIDC_SESSION_TYPE_DEC, 200, 10 },
+};
+
 static const struct venus_resources sdm845_res = {
.freq_tbl = sdm845_freq_table,
.freq_tbl_size = ARRAY_SIZE(sdm845_freq_table),
+   .codec_freq_data = sdm845_codec_freq_data,
+   .codec_freq_data_size = ARRAY_SIZE(sdm845_codec_freq_data),
.clks = {"core", "iface", "bus" },
.clks_num = 3,
.max_load = 2563200,
diff --git a/drivers/media/platform/qcom/venus/core.h 
b/drivers/media/platform/qcom/venus/core.h
index 7a3feb5..b8aef19 100644
--- a/drivers/media/platform/qcom/venus/core.h
+++ b/drivers/media/platform/qcom/venus/core.h
@@ -35,12 +35,21 @@ struct reg_val {
u32 value;
 };
 
+struct codec_freq_data {
+   u32 pixfmt;
+   u32 session_type;
+   unsigned long vpp_freq;
+   unsigned long vsp_freq;
+};
+
 struct venus_resources {
u64 dma_mask;
const struct freq_tbl *freq_tbl;
unsigned int freq_tbl_size;
const struct reg_val *reg_tbl;
unsigned int reg_tbl_size;
+   const struct codec_freq_data *codec_freq_data;
+   unsigned int codec_freq_data_size;
const char * const clks[VIDC_CLKS_NUM_MAX];
unsigned int clks_num;
enum hfi_version hfi_version;
@@ -216,6 +225,12 @@ struct venus_buffer {
struct list_head ref_list;
 };
 
+struct clock_data {
+   u32 core_id;
+   unsigned long freq;
+   const struct codec_freq_data *codec_freq_data;
+};
+
 #define to_venus_buffer(ptr)   container_of(ptr, struct venus_buffer, vb)
 
 /**
@@ -275,6 +290,7 @@ struct venus_inst {
struct list_head list;
struct mutex lock;
struct venus_core *core;
+   struct clock_data clk_data;
struct list_head dpbbufs;
struct list_head internalbufs;
struct list_head registeredbufs;
diff --git a/drivers/media/platform/qcom/venus/helpers.c 
b/drivers/media/platform/qcom/venus/helpers.c
index 5cad601..7492373 100644
--- a/drivers/media/platform/qcom/venus/helpers.c
+++ b/drivers/media/platform/qcom/venus/helpers.c
@@ -715,6 +715,36 @@ int venus_helper_set_core_usage(struct venus_inst *inst, 
u32 usage)
 }
 EXPORT_SYMBOL_GPL(venus_helper_set_core_usage);
 
+int venus_helper_init_codec_freq_data(struct venus_inst *inst)
+{
+   const struct codec_freq_data *data;
+   unsigned int i, data_size;
+   u32 pixfmt;
+   int ret = 0;
+
+   if (!IS_V4(inst->core))
+   return 0;
+
+   data = inst->core->res->codec_freq_data;
+   data_size = inst->core->res->codec_freq_data_size;
+   pixfmt = inst->session_type == VIDC_SESSION_TYPE_DEC ?
+   inst->fmt_out->pixfmt : inst->fmt_cap->pixfmt;
+
+   for (i = 0; i < data_size; i++) {
+   if (data[i].pixfmt == pixfmt &&
+   data[i].session_type == inst->session_type) {
+   inst->clk_data.codec_freq_data = [i];
+   break;
+   }
+   }
+
+   if (!inst->clk_data.codec_freq_data)
+   ret = -EINVAL;
+
+   return ret;
+}
+EXPORT_SYMBOL_GPL(venus_helper_init_codec_freq_data);
+
 int venus_helper_set_num_bufs(struct venus_inst *inst, unsigned int input_bufs,
  unsigned int output_bufs,
  unsigned int output2_bufs)
diff --git a/drivers/me

[PATCH v3] arm64: dts: sdm845: Add video nodes

2019-07-02 Thread Aniket Masule
From: Malathi Gottam 

This adds video nodes to sdm845 based on the examples
in the bindings.

Signed-off-by: Malathi Gottam 
Co-developed-by: Aniket Masule 
Signed-off-by: Aniket Masule 
---
 arch/arm64/boot/dts/qcom/sdm845.dtsi | 30 ++
 1 file changed, 30 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi 
b/arch/arm64/boot/dts/qcom/sdm845.dtsi
index fcb9330..f3cd94f 100644
--- a/arch/arm64/boot/dts/qcom/sdm845.dtsi
+++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi
@@ -1893,6 +1893,36 @@
};
};
 
+   video-codec@aa0 {
+   compatible = "qcom,sdm845-venus";
+   reg = <0 0x0aa0 0 0xff000>;
+   interrupts = ;
+   power-domains = < VENUS_GDSC>;
+   clocks = < VIDEO_CC_VENUS_CTL_CORE_CLK>,
+< VIDEO_CC_VENUS_AHB_CLK>,
+< VIDEO_CC_VENUS_CTL_AXI_CLK>;
+   clock-names = "core", "iface", "bus";
+   iommus = <_smmu 0x10a0 0x8>,
+<_smmu 0x10b0 0x0>;
+   memory-region = <_mem>;
+
+   video-core0 {
+   compatible = "venus-decoder";
+   clocks = < VIDEO_CC_VCODEC0_CORE_CLK>,
+< VIDEO_CC_VCODEC0_AXI_CLK>;
+   clock-names = "core", "bus";
+   power-domains = < VCODEC0_GDSC>;
+   };
+
+   video-core1 {
+   compatible = "venus-encoder";
+   clocks = < VIDEO_CC_VCODEC1_CORE_CLK>,
+< VIDEO_CC_VCODEC1_AXI_CLK>;
+   clock-names = "core", "bus";
+   power-domains = < VCODEC1_GDSC>;
+   };
+   };
+
videocc: clock-controller@ab0 {
compatible = "qcom,sdm845-videocc";
reg = <0 0x0ab0 0 0x1>;
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH v2] arm64: dts: sdm845: Add video nodes

2019-06-27 Thread Aniket Masule
This adds video nodes to sdm845 based on the examples
in the bindings.

Changes since v1:
 - Corrected the Signed-off-by ordering.
 - Corrected the node position based on the address.

Aniket Masule (1):
  arm64: dts: sdm845: Add video nodes

 arch/arm64/boot/dts/qcom/sdm845.dtsi | 30 ++
 1 file changed, 30 insertions(+)

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH v2] arm64: dts: sdm845: Add video nodes

2019-06-27 Thread Aniket Masule
From: Malathi Gottam 

This adds video nodes to sdm845 based on the examples
in the bindings.

Signed-off-by: Malathi Gottam 
Co-developed-by: Aniket Masule 
Signed-off-by: Aniket Masule 
---
 arch/arm64/boot/dts/qcom/sdm845.dtsi | 30 ++
 1 file changed, 30 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi 
b/arch/arm64/boot/dts/qcom/sdm845.dtsi
index fcb9330..94813a9 100644
--- a/arch/arm64/boot/dts/qcom/sdm845.dtsi
+++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi
@@ -1893,6 +1893,36 @@
};
};
 
+   video-codec@aa0 {
+   compatible = "qcom,sdm845-venus";
+   reg = <0x0aa0 0xff000>;
+   interrupts = ;
+   power-domains = < VENUS_GDSC>;
+   clocks = < VIDEO_CC_VENUS_CTL_CORE_CLK>,
+< VIDEO_CC_VENUS_AHB_CLK>,
+< VIDEO_CC_VENUS_CTL_AXI_CLK>;
+   clock-names = "core", "iface", "bus";
+   iommus = <_smmu 0x10a0 0x8>,
+<_smmu 0x10b0 0x0>;
+   memory-region = <_region>;
+
+   video-core0 {
+   compatible = "venus-decoder";
+   clocks = < VIDEO_CC_VCODEC0_CORE_CLK>,
+< VIDEO_CC_VCODEC0_AXI_CLK>;
+   clock-names = "core", "bus";
+   power-domains = < VCODEC0_GDSC>;
+   };
+
+   video-core1 {
+   compatible = "venus-encoder";
+   clocks = < VIDEO_CC_VCODEC1_CORE_CLK>,
+< VIDEO_CC_VCODEC1_AXI_CLK>;
+   clock-names = "core", "bus";
+   power-domains = < VCODEC1_GDSC>;
+   };
+   };
+
videocc: clock-controller@ab0 {
compatible = "qcom,sdm845-videocc";
reg = <0 0x0ab0 0 0x1>;
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH] arm64: dts: sdm845: Add video nodes

2019-06-26 Thread Aniket Masule
This adds video nodes to sdm845 based on the examples
in the bindings.

Signed-off-by: Malathi Gottam 
Signed-off-by: Aniket Masule 
---
 arch/arm64/boot/dts/qcom/sdm845.dtsi | 30 ++
 1 file changed, 30 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi 
b/arch/arm64/boot/dts/qcom/sdm845.dtsi
index fcb9330..ff94cfa 100644
--- a/arch/arm64/boot/dts/qcom/sdm845.dtsi
+++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi
@@ -2437,6 +2437,36 @@
;
iommus = <_smmu 0x0040 0x1>;
};
+
+   video-codec@aa0 {
+   compatible = "qcom,sdm845-venus";
+   reg = <0x0aa0 0xff000>;
+   interrupts = ;
+   power-domains = < VENUS_GDSC>;
+   clocks = < VIDEO_CC_VENUS_CTL_CORE_CLK>,
+< VIDEO_CC_VENUS_AHB_CLK>,
+< VIDEO_CC_VENUS_CTL_AXI_CLK>;
+   clock-names = "core", "iface", "bus";
+   iommus = <_smmu 0x10a0 0x8>,
+<_smmu 0x10b0 0x0>;
+   memory-region = <_region>;
+
+   video-core0 {
+   compatible = "venus-decoder";
+   clocks = < VIDEO_CC_VCODEC0_CORE_CLK>,
+< VIDEO_CC_VCODEC0_AXI_CLK>;
+   clock-names = "core", "bus";
+   power-domains = < VCODEC0_GDSC>;
+   };
+
+   video-core1 {
+   compatible = "venus-encoder";
+   clocks = < VIDEO_CC_VCODEC1_CORE_CLK>,
+< VIDEO_CC_VCODEC1_AXI_CLK>;
+   clock-names = "core", "bus";
+   power-domains = < VCODEC1_GDSC>;
+   };
+   };
};
 
thermal-zones {
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH] media: venus: Update to bitrate based clock scaling

2019-06-26 Thread Aniket Masule
Introduced clock scaling using bitrate, current
calculations consider only the cycles per mb.
Also, clock scaling is now triggered before every
buffer being queued to the device. This helps in
deciding precise clock cycles required.

Signed-off-by: Aniket Masule 
---
 drivers/media/platform/qcom/venus/core.c| 16 +--
 drivers/media/platform/qcom/venus/core.h|  1 +
 drivers/media/platform/qcom/venus/helpers.c | 43 +
 3 files changed, 47 insertions(+), 13 deletions(-)

diff --git a/drivers/media/platform/qcom/venus/core.c 
b/drivers/media/platform/qcom/venus/core.c
index f1597d6..ad6bb74 100644
--- a/drivers/media/platform/qcom/venus/core.c
+++ b/drivers/media/platform/qcom/venus/core.c
@@ -474,14 +474,14 @@ static __maybe_unused int venus_runtime_resume(struct 
device *dev)
 };
 
 static struct codec_freq_data sdm845_codec_freq_data[] =  {
-   { V4L2_PIX_FMT_H264, VIDC_SESSION_TYPE_ENC, 675 },
-   { V4L2_PIX_FMT_HEVC, VIDC_SESSION_TYPE_ENC, 675 },
-   { V4L2_PIX_FMT_VP8, VIDC_SESSION_TYPE_ENC, 675 },
-   { V4L2_PIX_FMT_MPEG2, VIDC_SESSION_TYPE_DEC, 200 },
-   { V4L2_PIX_FMT_H264, VIDC_SESSION_TYPE_DEC, 200 },
-   { V4L2_PIX_FMT_HEVC, VIDC_SESSION_TYPE_DEC, 200 },
-   { V4L2_PIX_FMT_VP8, VIDC_SESSION_TYPE_DEC, 200 },
-   { V4L2_PIX_FMT_VP9, VIDC_SESSION_TYPE_DEC, 200 },
+   { V4L2_PIX_FMT_H264, VIDC_SESSION_TYPE_ENC, 675, 10 },
+   { V4L2_PIX_FMT_HEVC, VIDC_SESSION_TYPE_ENC, 675, 10 },
+   { V4L2_PIX_FMT_VP8, VIDC_SESSION_TYPE_ENC, 675, 10 },
+   { V4L2_PIX_FMT_MPEG2, VIDC_SESSION_TYPE_DEC, 200, 10 },
+   { V4L2_PIX_FMT_H264, VIDC_SESSION_TYPE_DEC, 200, 10 },
+   { V4L2_PIX_FMT_HEVC, VIDC_SESSION_TYPE_DEC, 200, 10 },
+   { V4L2_PIX_FMT_VP8, VIDC_SESSION_TYPE_DEC, 200, 10 },
+   { V4L2_PIX_FMT_VP9, VIDC_SESSION_TYPE_DEC, 200, 10 },
 };
 
 static const struct venus_resources sdm845_res = {
diff --git a/drivers/media/platform/qcom/venus/core.h 
b/drivers/media/platform/qcom/venus/core.h
index 2ed6496..b964b7c 100644
--- a/drivers/media/platform/qcom/venus/core.h
+++ b/drivers/media/platform/qcom/venus/core.h
@@ -39,6 +39,7 @@ struct codec_freq_data {
u32 pixfmt;
u32 session_type;
unsigned int vpp_freq;
+   unsigned int vsp_freq;
 };
 
 struct venus_resources {
diff --git a/drivers/media/platform/qcom/venus/helpers.c 
b/drivers/media/platform/qcom/venus/helpers.c
index ef35fd8..634778a 100644
--- a/drivers/media/platform/qcom/venus/helpers.c
+++ b/drivers/media/platform/qcom/venus/helpers.c
@@ -379,6 +379,9 @@ static int scale_clocks(struct venus_inst *inst)
unsigned int i;
int ret;
 
+   if (inst->state == INST_START)
+   return 0;
+
mbs_per_sec = load_per_type(core, VIDC_SESSION_TYPE_ENC) +
  load_per_type(core, VIDC_SESSION_TYPE_DEC);
 
@@ -418,17 +421,26 @@ static int scale_clocks(struct venus_inst *inst)
return ret;
 }
 
-static unsigned long calculate_vpp_freq(struct venus_inst *inst)
+static unsigned long calculate_inst_freq(struct venus_inst *inst,
+unsigned long filled_len)
 {
-   unsigned long vpp_freq = 0;
+   unsigned long vpp_freq = 0, vsp_freq = 0;
+   u64 fps = inst->fps;
u32 mbs_per_sec;
 
mbs_per_sec = load_per_instance(inst);
vpp_freq = mbs_per_sec * inst->clk_data.codec_freq_data->vpp_freq;
/* 21 / 20 is overhead factor */
vpp_freq += vpp_freq / 20;
+   vsp_freq = mbs_per_sec * inst->clk_data.codec_freq_data->vsp_freq;
+
+   /* 10 / 7 is overhead factor */
+   if (inst->session_type == VIDC_SESSION_TYPE_ENC)
+   vsp_freq = (inst->controls.enc.bitrate * 10) / 7;
+   else
+   vsp_freq = ((fps * filled_len * 8) * 10) / 7;
 
-   return vpp_freq;
+   return max(vpp_freq, vsp_freq);
 }
 
 static int scale_clocks_v4(struct venus_inst *inst)
@@ -436,14 +448,30 @@ static int scale_clocks_v4(struct venus_inst *inst)
struct venus_core *core = inst->core;
const struct freq_tbl *table = core->res->freq_tbl;
unsigned int num_rows = core->res->freq_tbl_size;
-
+   struct v4l2_m2m_ctx *m2m_ctx = inst->m2m_ctx;
struct clk *clk = core->clks[0];
struct device *dev = core->dev;
+
unsigned int i;
unsigned long freq = 0, freq_core0 = 0, freq_core1 = 0;
+   unsigned long filled_len = 0;
+   struct venus_buffer *buf, *n;
+   struct vb2_buffer *vb;
int ret;
 
-   freq = calculate_vpp_freq(inst);
+   mutex_lock(>lock);
+   v4l2_m2m_for_each_src_buf_safe(m2m_ctx, buf, n) {
+   vb = >vb.vb2_buf;
+   filled_len = max(filled_len, vb2_get_plane_payload(vb, 0));
+   }
+   mutex_unlock(>lock);
+
+   if (inst->session_type == VIDC_SESSION_TYPE_DEC && !filled_len) {
+   dev_dbg(dev, 

[PATCH] media: venus: Update to bitrate based clock scaling

2019-06-26 Thread Aniket Masule
This patch introduces bitrate based clock scaling. Also, clock scaling is now
triggered before buffer being queued to the device. This checks for frequency
requirement throughout the session and updates clock with correct frequency only
if requirement is changed.

Aniket Masule (1):
  media: venus: Update to bitrate based clock scaling

 drivers/media/platform/qcom/venus/core.c| 16 +--
 drivers/media/platform/qcom/venus/core.h|  1 +
 drivers/media/platform/qcom/venus/helpers.c | 43 +
 3 files changed, 47 insertions(+), 13 deletions(-)

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH v3 3/4] media: venus: Add interface for load per core

2019-06-25 Thread Aniket Masule
Add and interface to calculate load per core. Also,
add an interface to get maximum cores available with
video. This interface is preparation for updating core
selection.

Signed-off-by: Aniket Masule 
---
 drivers/media/platform/qcom/venus/helpers.c| 19 +++
 drivers/media/platform/qcom/venus/hfi_helper.h |  1 +
 drivers/media/platform/qcom/venus/hfi_parser.h |  5 +
 3 files changed, 25 insertions(+)

diff --git a/drivers/media/platform/qcom/venus/helpers.c 
b/drivers/media/platform/qcom/venus/helpers.c
index e1a0247..b79e83a 100644
--- a/drivers/media/platform/qcom/venus/helpers.c
+++ b/drivers/media/platform/qcom/venus/helpers.c
@@ -26,6 +26,7 @@
 #include "helpers.h"
 #include "hfi_helper.h"
 #include "hfi_venus_io.h"
+#include "hfi_parser.h"
 
 struct intbuf {
struct list_head list;
@@ -331,6 +332,24 @@ static u32 load_per_instance(struct venus_inst *inst)
return mbs * inst->fps;
 }
 
+static u32 load_per_core(struct venus_core *core, u32 core_id)
+{
+   struct venus_inst *inst = NULL;
+   u32 mbs_per_sec = 0, load = 0;
+
+   mutex_lock(>lock);
+   list_for_each_entry(inst, >instances, list) {
+   if (!(inst->clk_data.core_id == core_id))
+   continue;
+
+   mbs_per_sec = load_per_instance(inst);
+   load = mbs_per_sec * inst->clk_data.codec_freq_data->vpp_freq;
+   }
+   mutex_unlock(>lock);
+
+   return load;
+}
+
 static u32 load_per_type(struct venus_core *core, u32 session_type)
 {
struct venus_inst *inst = NULL;
diff --git a/drivers/media/platform/qcom/venus/hfi_helper.h 
b/drivers/media/platform/qcom/venus/hfi_helper.h
index 34ea503..f3d1018 100644
--- a/drivers/media/platform/qcom/venus/hfi_helper.h
+++ b/drivers/media/platform/qcom/venus/hfi_helper.h
@@ -559,6 +559,7 @@ struct hfi_bitrate {
 #define HFI_CAPABILITY_LCU_SIZE0x14
 #define HFI_CAPABILITY_HIER_P_HYBRID_NUM_ENH_LAYERS0x15
 #define HFI_CAPABILITY_MBS_PER_SECOND_POWERSAVE0x16
+#define HFI_CAPABILITY_MAX_VIDEOCORES  0x2B
 
 struct hfi_capability {
u32 capability_type;
diff --git a/drivers/media/platform/qcom/venus/hfi_parser.h 
b/drivers/media/platform/qcom/venus/hfi_parser.h
index 3e931c7..264e6dd 100644
--- a/drivers/media/platform/qcom/venus/hfi_parser.h
+++ b/drivers/media/platform/qcom/venus/hfi_parser.h
@@ -107,4 +107,9 @@ static inline u32 frate_step(struct venus_inst *inst)
return cap_step(inst, HFI_CAPABILITY_FRAMERATE);
 }
 
+static inline u32 core_num_max(struct venus_inst *inst)
+{
+   return cap_max(inst, HFI_CAPABILITY_MAX_VIDEOCORES);
+}
+
 #endif
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH v3 1/4] media: venus: Add codec data table

2019-06-25 Thread Aniket Masule
Add vpp cycles for for different types of codec
It indicates the cycles required by video hardware
to process each macroblock. Initialize the codec
data with core resources.

Signed-off-by: Aniket Masule 
---
 drivers/media/platform/qcom/venus/core.c| 13 +
 drivers/media/platform/qcom/venus/core.h| 15 +++
 drivers/media/platform/qcom/venus/helpers.c | 30 +
 drivers/media/platform/qcom/venus/helpers.h |  1 +
 drivers/media/platform/qcom/venus/vdec.c|  4 
 drivers/media/platform/qcom/venus/venc.c|  4 
 6 files changed, 67 insertions(+)

diff --git a/drivers/media/platform/qcom/venus/core.c 
b/drivers/media/platform/qcom/venus/core.c
index 7393667..f1597d6 100644
--- a/drivers/media/platform/qcom/venus/core.c
+++ b/drivers/media/platform/qcom/venus/core.c
@@ -473,9 +473,22 @@ static __maybe_unused int venus_runtime_resume(struct 
device *dev)
{  244800, 1 }, /* 1920x1080@30 */
 };
 
+static struct codec_freq_data sdm845_codec_freq_data[] =  {
+   { V4L2_PIX_FMT_H264, VIDC_SESSION_TYPE_ENC, 675 },
+   { V4L2_PIX_FMT_HEVC, VIDC_SESSION_TYPE_ENC, 675 },
+   { V4L2_PIX_FMT_VP8, VIDC_SESSION_TYPE_ENC, 675 },
+   { V4L2_PIX_FMT_MPEG2, VIDC_SESSION_TYPE_DEC, 200 },
+   { V4L2_PIX_FMT_H264, VIDC_SESSION_TYPE_DEC, 200 },
+   { V4L2_PIX_FMT_HEVC, VIDC_SESSION_TYPE_DEC, 200 },
+   { V4L2_PIX_FMT_VP8, VIDC_SESSION_TYPE_DEC, 200 },
+   { V4L2_PIX_FMT_VP9, VIDC_SESSION_TYPE_DEC, 200 },
+};
+
 static const struct venus_resources sdm845_res = {
.freq_tbl = sdm845_freq_table,
.freq_tbl_size = ARRAY_SIZE(sdm845_freq_table),
+   .codec_freq_data = sdm845_codec_freq_data,
+   .codec_freq_data_size = ARRAY_SIZE(sdm845_codec_freq_data),
.clks = {"core", "iface", "bus" },
.clks_num = 3,
.max_load = 2563200,
diff --git a/drivers/media/platform/qcom/venus/core.h 
b/drivers/media/platform/qcom/venus/core.h
index 7a3feb5..2ed6496 100644
--- a/drivers/media/platform/qcom/venus/core.h
+++ b/drivers/media/platform/qcom/venus/core.h
@@ -35,12 +35,20 @@ struct reg_val {
u32 value;
 };
 
+struct codec_freq_data {
+   u32 pixfmt;
+   u32 session_type;
+   unsigned int vpp_freq;
+};
+
 struct venus_resources {
u64 dma_mask;
const struct freq_tbl *freq_tbl;
unsigned int freq_tbl_size;
const struct reg_val *reg_tbl;
unsigned int reg_tbl_size;
+   const struct codec_freq_data *codec_freq_data;
+   unsigned int codec_freq_data_size;
const char * const clks[VIDC_CLKS_NUM_MAX];
unsigned int clks_num;
enum hfi_version hfi_version;
@@ -216,6 +224,12 @@ struct venus_buffer {
struct list_head ref_list;
 };
 
+struct clock_data {
+   u32 core_id;
+   unsigned long freq;
+   struct codec_freq_data *codec_freq_data;
+};
+
 #define to_venus_buffer(ptr)   container_of(ptr, struct venus_buffer, vb)
 
 /**
@@ -275,6 +289,7 @@ struct venus_inst {
struct list_head list;
struct mutex lock;
struct venus_core *core;
+   struct clock_data clk_data;
struct list_head dpbbufs;
struct list_head internalbufs;
struct list_head registeredbufs;
diff --git a/drivers/media/platform/qcom/venus/helpers.c 
b/drivers/media/platform/qcom/venus/helpers.c
index 5cad601..f7f724b 100644
--- a/drivers/media/platform/qcom/venus/helpers.c
+++ b/drivers/media/platform/qcom/venus/helpers.c
@@ -715,6 +715,36 @@ int venus_helper_set_core_usage(struct venus_inst *inst, 
u32 usage)
 }
 EXPORT_SYMBOL_GPL(venus_helper_set_core_usage);
 
+int venus_helper_init_codec_data(struct venus_inst *inst)
+{
+   const struct codec_data *codec_data;
+   unsigned int i, codec_data_size;
+   u32 pixfmt;
+   int ret = 0;
+
+   if (!IS_V4(inst->core))
+   return 0;
+
+   codec_data = inst->core->res->codec_data;
+   codec_data_size = inst->core->res->codec_data_size;
+   pixfmt = inst->session_type == VIDC_SESSION_TYPE_DEC ?
+   inst->fmt_out->pixfmt : inst->fmt_cap->pixfmt;
+
+   for (i = 0; i < codec_data_size; i++) {
+   if (codec_data[i].pixfmt == pixfmt &&
+   codec_data[i].session_type == inst->session_type) {
+   inst->clk_data.codec_data = _data[i];
+   break;
+   }
+   }
+
+   if (!inst->clk_data.codec_data)
+   ret = -EINVAL;
+
+   return ret;
+}
+EXPORT_SYMBOL_GPL(venus_helper_init_codec_data);
+
 int venus_helper_set_num_bufs(struct venus_inst *inst, unsigned int input_bufs,
  unsigned int output_bufs,
  unsigned int output2_bufs)
diff --git a/drivers/media/platform/qcom/venus/helpers.h 
b/drivers/media/platform/qcom/venus/helpers.h
index 2475f284..f9360a8 1006

[PATCH v3 0/4] media: venus: Update clock scaling and core selection

2019-06-25 Thread Aniket Masule
In this patch series, clock scaling and core selection methods are
updated. Current clock scaling and core selection methods are same
for vpu4 and previous versions. Introducing load calculations using
vpp cycles, which indicates the cycles required by video hardware to
process each macroblock. Clock scaling is now done more precisely using
vpp cycles. Instance is assigned to core with minimum load, instead of
of static assignment.

Changes since v2 addressing the comments:
 - Codec_data and vpp_cycles have renamed to codec_freq_data and
   vpp_freq respectively.
 - Conditional check in patch 4 for checking max cores available
   is updated to VIDC_CORE_ID_2.
 - venus_helper_decide_core renamed to venus_helper_set_core.
 - scale_clocks_vpu4 renamed scale_clocks_v4.

For comment on freq fied in clk_data structure, it is getting initialized
and used in scale_clocks_v4 function. 


Aniket Masule (4):
  media: venus: Add codec data table
  media: venus: Update clock scaling
  media: venus: Add interface for load per core
  media: venus: Update core selection

 drivers/media/platform/qcom/venus/core.c   |  13 ++
 drivers/media/platform/qcom/venus/core.h   |  15 ++
 drivers/media/platform/qcom/venus/helpers.c| 190 +++--
 drivers/media/platform/qcom/venus/helpers.h|   3 +-
 drivers/media/platform/qcom/venus/hfi_helper.h |   1 +
 drivers/media/platform/qcom/venus/hfi_parser.h |   5 +
 drivers/media/platform/qcom/venus/vdec.c   |   6 +-
 drivers/media/platform/qcom/venus/venc.c   |   6 +-
 8 files changed, 226 insertions(+), 13 deletions(-)

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH v3 4/4] media: venus: Update core selection

2019-06-25 Thread Aniket Masule
Present core assignment is static. Introduced load balancing
across the cores. Load on earch core is calculated and core
with minimum load is assigned to given instance.

Signed-off-by: Aniket Masule 
---
 drivers/media/platform/qcom/venus/helpers.c | 52 +
 drivers/media/platform/qcom/venus/helpers.h |  2 +-
 drivers/media/platform/qcom/venus/vdec.c|  2 +-
 drivers/media/platform/qcom/venus/venc.c|  2 +-
 4 files changed, 49 insertions(+), 9 deletions(-)

diff --git a/drivers/media/platform/qcom/venus/helpers.c 
b/drivers/media/platform/qcom/venus/helpers.c
index b79e83a..ef35fd8 100644
--- a/drivers/media/platform/qcom/venus/helpers.c
+++ b/drivers/media/platform/qcom/venus/helpers.c
@@ -498,6 +498,16 @@ static int load_scale_clocks(struct venus_inst *inst)
return scale_clocks(inst);
 }
 
+int set_core_usage(struct venus_inst *inst, u32 usage)
+{
+   const u32 ptype = HFI_PROPERTY_CONFIG_VIDEOCORES_USAGE;
+   struct hfi_videocores_usage_type cu;
+
+   cu.video_core_enable_mask = usage;
+
+   return hfi_session_set_property(inst, ptype, );
+}
+
 static void fill_buffer_desc(const struct venus_buffer *buf,
 struct hfi_buffer_desc *bd, bool response)
 {
@@ -801,19 +811,49 @@ int venus_helper_set_work_mode(struct venus_inst *inst, 
u32 mode)
 }
 EXPORT_SYMBOL_GPL(venus_helper_set_work_mode);
 
-int venus_helper_set_core_usage(struct venus_inst *inst, u32 usage)
+int venus_helper_set_core(struct venus_inst *inst)
 {
-   const u32 ptype = HFI_PROPERTY_CONFIG_VIDEOCORES_USAGE;
-   struct hfi_videocores_usage_type cu;
+   struct venus_core *core = inst->core;
+   u32 min_core_id = 0, core0_load = 0, core1_load = 0;
+   unsigned long min_load, max_freq, cur_inst_load;
+   u32 cores_max;
+   int ret;
 
if (!IS_V4(inst->core))
return 0;
 
-   cu.video_core_enable_mask = usage;
+   core0_load = load_per_core(core, VIDC_CORE_ID_1);
+   core1_load = load_per_core(core, VIDC_CORE_ID_2);
 
-   return hfi_session_set_property(inst, ptype, );
+   min_core_id = core0_load < core1_load ? VIDC_CORE_ID_1 : VIDC_CORE_ID_2;
+   min_load = min(core0_load, core1_load);
+   cores_max = core_num_max(inst);
+
+   if (cores_max < VIDC_CORE_ID_2) {
+   min_core_id = VIDC_CORE_ID_1;
+   min_load = core0_load;
+   }
+
+   cur_inst_load = load_per_instance(inst) *
+   inst->clk_data.codec_freq_data->vpp_freq;
+   max_freq = core->res->freq_tbl[0].freq;
+
+   if ((cur_inst_load + min_load)  > max_freq) {
+   dev_warn(core->dev, "HW is overloaded, needed: %lu max: %lu\n",
+cur_inst_load, max_freq);
+   return -EINVAL;
+   }
+
+   ret = set_core_usage(inst, min_core_id);
+
+   if (ret)
+   return ret;
+
+   inst->clk_data.core_id = min_core_id;
+
+   return 0;
 }
-EXPORT_SYMBOL_GPL(venus_helper_set_core_usage);
+EXPORT_SYMBOL_GPL(venus_helper_set_core);
 
 int venus_helper_init_codec_freq_data(struct venus_inst *inst)
 {
diff --git a/drivers/media/platform/qcom/venus/helpers.h 
b/drivers/media/platform/qcom/venus/helpers.h
index 2c13245..1034111 100644
--- a/drivers/media/platform/qcom/venus/helpers.h
+++ b/drivers/media/platform/qcom/venus/helpers.h
@@ -42,7 +42,7 @@ int venus_helper_set_output_resolution(struct venus_inst 
*inst,
   u32 buftype);
 int venus_helper_set_work_mode(struct venus_inst *inst, u32 mode);
 int venus_helper_init_codec_freq_data(struct venus_inst *inst);
-int venus_helper_set_core_usage(struct venus_inst *inst, u32 usage);
+int venus_helper_set_core(struct venus_inst *inst);
 int venus_helper_set_num_bufs(struct venus_inst *inst, unsigned int input_bufs,
  unsigned int output_bufs,
  unsigned int output2_bufs);
diff --git a/drivers/media/platform/qcom/venus/vdec.c 
b/drivers/media/platform/qcom/venus/vdec.c
index d037f80..620e060 100644
--- a/drivers/media/platform/qcom/venus/vdec.c
+++ b/drivers/media/platform/qcom/venus/vdec.c
@@ -551,7 +551,7 @@ static int vdec_output_conf(struct venus_inst *inst)
if (ret)
return ret;
 
-   ret = venus_helper_set_core_usage(inst, VIDC_CORE_ID_1);
+   ret = venus_helper_set_core(inst);
if (ret)
return ret;
 
diff --git a/drivers/media/platform/qcom/venus/venc.c 
b/drivers/media/platform/qcom/venus/venc.c
index cdddc82..28e76cc 100644
--- a/drivers/media/platform/qcom/venus/venc.c
+++ b/drivers/media/platform/qcom/venus/venc.c
@@ -660,7 +660,7 @@ static int venc_set_properties(struct venus_inst *inst)
if (ret)
return ret;
 
-   ret = venus_helper_set_core_usage(inst, VIDC_CORE_ID_2);
+   ret = venus_helper_set_core(inst);
if (ret)
return ret

[PATCH v3 2/4] media: venus: Update clock scaling

2019-06-25 Thread Aniket Masule
Current clock scaling calculations are same for vpu4 and
previous versions. For vpu4, Clock scaling calculations
are updated with cycles/mb. This helps in getting precise
clock required.

Signed-off-by: Aniket Masule 
---
 drivers/media/platform/qcom/venus/helpers.c | 111 
 drivers/media/platform/qcom/venus/helpers.h |   2 +-
 drivers/media/platform/qcom/venus/vdec.c|   2 +-
 drivers/media/platform/qcom/venus/venc.c|   2 +-
 4 files changed, 99 insertions(+), 18 deletions(-)

diff --git a/drivers/media/platform/qcom/venus/helpers.c 
b/drivers/media/platform/qcom/venus/helpers.c
index f7f724b..e1a0247 100644
--- a/drivers/media/platform/qcom/venus/helpers.c
+++ b/drivers/media/platform/qcom/venus/helpers.c
@@ -348,8 +348,9 @@ static u32 load_per_type(struct venus_core *core, u32 
session_type)
return mbs_per_sec;
 }
 
-static int load_scale_clocks(struct venus_core *core)
+static int scale_clocks(struct venus_inst *inst)
 {
+   struct venus_core *core = inst->core;
const struct freq_tbl *table = core->res->freq_tbl;
unsigned int num_rows = core->res->freq_tbl_size;
unsigned long freq = table[0].freq;
@@ -398,6 +399,86 @@ static int load_scale_clocks(struct venus_core *core)
return ret;
 }
 
+static unsigned long calculate_vpp_freq(struct venus_inst *inst)
+{
+   unsigned long vpp_freq = 0;
+   u32 mbs_per_sec;
+
+   mbs_per_sec = load_per_instance(inst);
+   vpp_freq = mbs_per_sec * inst->clk_data.codec_freq_data->vpp_freq;
+   /* 21 / 20 is overhead factor */
+   vpp_freq += vpp_freq / 20;
+
+   return vpp_freq;
+}
+
+static int scale_clocks_v4(struct venus_inst *inst)
+{
+   struct venus_core *core = inst->core;
+   const struct freq_tbl *table = core->res->freq_tbl;
+   unsigned int num_rows = core->res->freq_tbl_size;
+
+   struct clk *clk = core->clks[0];
+   struct device *dev = core->dev;
+   unsigned int i;
+   unsigned long freq = 0, freq_core0 = 0, freq_core1 = 0;
+   int ret;
+
+   freq = calculate_vpp_freq(inst);
+
+   if (freq > table[0].freq)
+   goto err;
+
+   for (i = 0; i < num_rows; i++) {
+   if (freq > table[i].freq)
+   break;
+   freq = table[i].freq;
+   }
+
+   inst->clk_data.freq = freq;
+
+   mutex_lock(>lock);
+   list_for_each_entry(inst, >instances, list) {
+   if (inst->clk_data.core_id == VIDC_CORE_ID_1) {
+   freq_core0 += inst->clk_data.freq;
+   } else if (inst->clk_data.core_id == VIDC_CORE_ID_2) {
+   freq_core1 += inst->clk_data.freq;
+   } else if (inst->clk_data.core_id == VIDC_CORE_ID_3) {
+   freq_core0 += inst->clk_data.freq;
+   freq_core1 += inst->clk_data.freq;
+   }
+   }
+   mutex_unlock(>lock);
+
+   freq = max(freq_core0, freq_core1);
+
+   ret = clk_set_rate(clk, freq);
+   if (ret)
+   goto err;
+
+   ret = clk_set_rate(core->core0_clk, freq);
+   if (ret)
+   goto err;
+
+   ret = clk_set_rate(core->core1_clk, freq);
+   if (ret)
+   goto err;
+
+   return 0;
+
+err:
+   dev_err(dev, "failed to set clock rate %lu (%d)\n", freq, ret);
+   return ret;
+}
+
+static int load_scale_clocks(struct venus_inst *inst)
+{
+   if (IS_V4(inst->core))
+   return scale_clocks_v4(inst);
+
+   return scale_clocks(inst);
+}
+
 static void fill_buffer_desc(const struct venus_buffer *buf,
 struct hfi_buffer_desc *bd, bool response)
 {
@@ -715,35 +796,36 @@ int venus_helper_set_core_usage(struct venus_inst *inst, 
u32 usage)
 }
 EXPORT_SYMBOL_GPL(venus_helper_set_core_usage);
 
-int venus_helper_init_codec_data(struct venus_inst *inst)
+int venus_helper_init_codec_freq_data(struct venus_inst *inst)
 {
-   const struct codec_data *codec_data;
-   unsigned int i, codec_data_size;
+   const struct codec_freq_data *codec_freq_data;
+   unsigned int i, codec_freq_data_size;
u32 pixfmt;
int ret = 0;
 
if (!IS_V4(inst->core))
return 0;
 
-   codec_data = inst->core->res->codec_data;
-   codec_data_size = inst->core->res->codec_data_size;
+   codec_freq_data = inst->core->res->codec_freq_data;
+   codec_freq_data_size = inst->core->res->codec_freq_data_size;
pixfmt = inst->session_type == VIDC_SESSION_TYPE_DEC ?
inst->fmt_out->pixfmt : inst->fmt_cap->pixfmt;
 
-   for (i = 0; i < codec_data_size; i++) {
-   if (codec_data[i].pixfmt == pixfmt &&
-   codec_data[i].session_type == inst->session_type) {

[PATCH 5/5] media: venus: Update core selection

2019-06-11 Thread Aniket Masule
Present core assignment is static. Introduced load balancing
across the cores. Load on earch core is calculated and core
with minimum load is assigned to given instance.

Signed-off-by: Aniket Masule 
---
 drivers/media/platform/qcom/venus/helpers.c | 50 +
 drivers/media/platform/qcom/venus/helpers.h |  2 +-
 drivers/media/platform/qcom/venus/vdec.c|  5 +--
 drivers/media/platform/qcom/venus/venc.c|  4 ++-
 4 files changed, 51 insertions(+), 10 deletions(-)

diff --git a/drivers/media/platform/qcom/venus/helpers.c 
b/drivers/media/platform/qcom/venus/helpers.c
index edb653e..38d617b 100644
--- a/drivers/media/platform/qcom/venus/helpers.c
+++ b/drivers/media/platform/qcom/venus/helpers.c
@@ -497,6 +497,16 @@ static int load_scale_clocks(struct venus_inst *inst)
return scale_clocks_vpu4(inst);
 }
 
+int set_core_usage(struct venus_inst *inst, u32 usage)
+{
+   const u32 ptype = HFI_PROPERTY_CONFIG_VIDEOCORES_USAGE;
+   struct hfi_videocores_usage_type cu;
+
+   cu.video_core_enable_mask = usage;
+
+   return hfi_session_set_property(inst, ptype, );
+}
+
 static void fill_buffer_desc(const struct venus_buffer *buf,
 struct hfi_buffer_desc *bd, bool response)
 {
@@ -800,19 +810,47 @@ int venus_helper_set_work_mode(struct venus_inst *inst, 
u32 mode)
 }
 EXPORT_SYMBOL_GPL(venus_helper_set_work_mode);
 
-int venus_helper_set_core_usage(struct venus_inst *inst, u32 usage)
+int venus_helper_decide_core(struct venus_inst *inst, u32 cores_max)
 {
-   const u32 ptype = HFI_PROPERTY_CONFIG_VIDEOCORES_USAGE;
-   struct hfi_videocores_usage_type cu;
+   struct venus_core *core = inst->core;
+   u32 min_core_id = 0, core0_load = 0, core1_load = 0;
+   unsigned long min_load, max_freq, cur_inst_load;
+   int ret;
 
if (!IS_V4(inst->core))
return 0;
 
-   cu.video_core_enable_mask = usage;
+   core0_load = load_per_core(core, VIDC_CORE_ID_1);
+   core1_load = load_per_core(core, VIDC_CORE_ID_2);
 
-   return hfi_session_set_property(inst, ptype, );
+   min_core_id = core0_load < core1_load ? VIDC_CORE_ID_1 : VIDC_CORE_ID_2;
+   min_load = min(core0_load, core1_load);
+
+   if (cores_max < VIDC_CORE_ID_1) {
+   min_core_id = VIDC_CORE_ID_1;
+   min_load = core0_load;
+   }
+
+   cur_inst_load = load_per_instance(inst) *
+   inst->clk_data.codec_data->vpp_cycles;
+   max_freq = core->res->freq_tbl[0].freq;
+
+   if ((cur_inst_load + min_load)  > max_freq) {
+   dev_warn(core->dev, "HW is overloaded, needed: %lu max: %lu\n",
+cur_inst_load, max_freq);
+   return -EINVAL;
+   }
+
+   ret = set_core_usage(inst, min_core_id);
+
+   if (ret)
+   return ret;
+
+   inst->clk_data.core_id = min_core_id;
+
+   return 0;
 }
-EXPORT_SYMBOL_GPL(venus_helper_set_core_usage);
+EXPORT_SYMBOL_GPL(venus_helper_decide_core);
 
 int venus_helper_init_codec_data(struct venus_inst *inst)
 {
diff --git a/drivers/media/platform/qcom/venus/helpers.h 
b/drivers/media/platform/qcom/venus/helpers.h
index f9360a8..c41ceb3 100644
--- a/drivers/media/platform/qcom/venus/helpers.h
+++ b/drivers/media/platform/qcom/venus/helpers.h
@@ -42,7 +42,7 @@ int venus_helper_set_output_resolution(struct venus_inst 
*inst,
   u32 buftype);
 int venus_helper_set_work_mode(struct venus_inst *inst, u32 mode);
 int venus_helper_init_codec_data(struct venus_inst *inst);
-int venus_helper_set_core_usage(struct venus_inst *inst, u32 usage);
+int venus_helper_decide_core(struct venus_inst *inst, u32 cores_max);
 int venus_helper_set_num_bufs(struct venus_inst *inst, unsigned int input_bufs,
  unsigned int output_bufs,
  unsigned int output2_bufs);
diff --git a/drivers/media/platform/qcom/venus/vdec.c 
b/drivers/media/platform/qcom/venus/vdec.c
index 51795fd..9f988ba 100644
--- a/drivers/media/platform/qcom/venus/vdec.c
+++ b/drivers/media/platform/qcom/venus/vdec.c
@@ -544,14 +544,15 @@ static int vdec_output_conf(struct venus_inst *inst)
u32 height = inst->out_height;
u32 out_fmt, out2_fmt;
bool ubwc = false;
-   u32 ptype;
+   u32 ptype, cores_max;
int ret;
 
ret = venus_helper_set_work_mode(inst, VIDC_WORK_MODE_2);
if (ret)
return ret;
 
-   ret = venus_helper_set_core_usage(inst, VIDC_CORE_ID_1);
+   cores_max = core_num_max(inst);
+   ret = venus_helper_decide_core(inst, cores_max);
if (ret)
return ret;
 
diff --git a/drivers/media/platform/qcom/venus/venc.c 
b/drivers/media/platform/qcom/venus/venc.c
index 792cdce..ed39efd 100644
--- a/drivers/media/platform/qcom/venus/venc.c
+++ b/drivers/media/platform/qcom

[PATCH 3/5] media: venus: Update clock scaling

2019-06-11 Thread Aniket Masule
Current clock scaling calculations are same for vpu4 and
previous versions. For vpu4, Clock scaling calculations
are updated with cycles/mb. This helps in getting precise
clock required.

Signed-off-by: Aniket Masule 
---
 drivers/media/platform/qcom/venus/helpers.c | 88 +++--
 1 file changed, 84 insertions(+), 4 deletions(-)

diff --git a/drivers/media/platform/qcom/venus/helpers.c 
b/drivers/media/platform/qcom/venus/helpers.c
index f7f724b..7bcc1e6 100644
--- a/drivers/media/platform/qcom/venus/helpers.c
+++ b/drivers/media/platform/qcom/venus/helpers.c
@@ -348,8 +348,9 @@ static u32 load_per_type(struct venus_core *core, u32 
session_type)
return mbs_per_sec;
 }
 
-static int load_scale_clocks(struct venus_core *core)
+static int scale_clocks(struct venus_inst *inst)
 {
+   struct venus_core *core = inst->core;
const struct freq_tbl *table = core->res->freq_tbl;
unsigned int num_rows = core->res->freq_tbl_size;
unsigned long freq = table[0].freq;
@@ -398,6 +399,86 @@ static int load_scale_clocks(struct venus_core *core)
return ret;
 }
 
+static unsigned long calculate_inst_freq(struct venus_inst *inst)
+{
+   unsigned long vpp_cycles = 0;
+   u32 mbs_per_sec;
+
+   mbs_per_sec = load_per_instance(inst);
+   vpp_cycles = mbs_per_sec * inst->clk_data.codec_data->vpp_cycles;
+   /* 21 / 20 is overhead factor */
+   vpp_cycles += vpp_cycles / 20;
+
+   return vpp_cycles;
+}
+
+static int scale_clocks_vpu4(struct venus_inst *inst)
+{
+   struct venus_core *core = inst->core;
+   const struct freq_tbl *table = core->res->freq_tbl;
+   unsigned int num_rows = core->res->freq_tbl_size;
+
+   struct clk *clk = core->clks[0];
+   struct device *dev = core->dev;
+   unsigned int i;
+   unsigned long freq = 0, freq_core0 = 0, freq_core1 = 0;
+   int ret;
+
+   freq = calculate_inst_freq(inst);
+
+   if (freq > table[0].freq)
+   goto err;
+
+   for (i = 0; i < num_rows; i++) {
+   if (freq > table[i].freq)
+   break;
+   freq = table[i].freq;
+   }
+
+   inst->clk_data.freq = freq;
+
+   mutex_lock(>lock);
+   list_for_each_entry(inst, >instances, list) {
+   if (inst->clk_data.core_id == VIDC_CORE_ID_1) {
+   freq_core0 += inst->clk_data.freq;
+   } else if (inst->clk_data.core_id == VIDC_CORE_ID_2) {
+   freq_core1 += inst->clk_data.freq;
+   } else if (inst->clk_data.core_id == VIDC_CORE_ID_3) {
+   freq_core0 += inst->clk_data.freq;
+   freq_core1 += inst->clk_data.freq;
+   }
+   }
+   mutex_unlock(>lock);
+
+   freq = max(freq_core0, freq_core1);
+
+   ret = clk_set_rate(clk, freq);
+   if (ret)
+   goto err;
+
+   ret = clk_set_rate(core->core0_clk, freq);
+   if (ret)
+   goto err;
+
+   ret = clk_set_rate(core->core1_clk, freq);
+   if (ret)
+   goto err;
+
+   return 0;
+
+err:
+   dev_err(dev, "failed to set clock rate %lu (%d)\n", freq, ret);
+   return ret;
+}
+
+static int load_scale_clocks(struct venus_inst *inst)
+{
+   if (IS_V3(inst->core) || IS_V1(inst->core))
+   return scale_clocks(inst);
+   else
+   return scale_clocks_vpu4(inst);
+}
+
 static void fill_buffer_desc(const struct venus_buffer *buf,
 struct hfi_buffer_desc *bd, bool response)
 {
@@ -1053,7 +1134,7 @@ void venus_helper_vb2_stop_streaming(struct vb2_queue *q)
 
venus_helper_free_dpb_bufs(inst);
 
-   load_scale_clocks(core);
+   load_scale_clocks(inst);
INIT_LIST_HEAD(>registeredbufs);
}
 
@@ -1070,7 +1151,6 @@ void venus_helper_vb2_stop_streaming(struct vb2_queue *q)
 
 int venus_helper_vb2_start_streaming(struct venus_inst *inst)
 {
-   struct venus_core *core = inst->core;
int ret;
 
ret = intbufs_alloc(inst);
@@ -1081,7 +1161,7 @@ int venus_helper_vb2_start_streaming(struct venus_inst 
*inst)
if (ret)
goto err_bufs_free;
 
-   load_scale_clocks(core);
+   load_scale_clocks(inst);
 
ret = hfi_session_load_res(inst);
if (ret)
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH 4/5] media: venus: Add interface for load per core

2019-06-11 Thread Aniket Masule
Add and interface to calculate load per core. Also,
add an interface to get maximum cores available with
video. This interface is preparation for updating core
selection.

Signed-off-by: Aniket Masule 
---
 drivers/media/platform/qcom/venus/helpers.c| 18 ++
 drivers/media/platform/qcom/venus/hfi_helper.h |  1 +
 drivers/media/platform/qcom/venus/hfi_parser.h |  5 +
 3 files changed, 24 insertions(+)

diff --git a/drivers/media/platform/qcom/venus/helpers.c 
b/drivers/media/platform/qcom/venus/helpers.c
index 7bcc1e6..edb653e 100644
--- a/drivers/media/platform/qcom/venus/helpers.c
+++ b/drivers/media/platform/qcom/venus/helpers.c
@@ -331,6 +331,24 @@ static u32 load_per_instance(struct venus_inst *inst)
return mbs * inst->fps;
 }
 
+static u32 load_per_core(struct venus_core *core, u32 core_id)
+{
+   struct venus_inst *inst = NULL;
+   u32 mbs_per_sec = 0, load = 0;
+
+   mutex_lock(>lock);
+   list_for_each_entry(inst, >instances, list) {
+   if (!(inst->clk_data.core_id == core_id))
+   continue;
+
+   mbs_per_sec += load_per_instance(inst);
+   load += mbs_per_sec * inst->clk_data.codec_data->vpp_cycles;
+   }
+   mutex_unlock(>lock);
+
+   return load;
+}
+
 static u32 load_per_type(struct venus_core *core, u32 session_type)
 {
struct venus_inst *inst = NULL;
diff --git a/drivers/media/platform/qcom/venus/hfi_helper.h 
b/drivers/media/platform/qcom/venus/hfi_helper.h
index 34ea503..3677e2e 100644
--- a/drivers/media/platform/qcom/venus/hfi_helper.h
+++ b/drivers/media/platform/qcom/venus/hfi_helper.h
@@ -559,6 +559,7 @@ struct hfi_bitrate {
 #define HFI_CAPABILITY_LCU_SIZE0x14
 #define HFI_CAPABILITY_HIER_P_HYBRID_NUM_ENH_LAYERS0x15
 #define HFI_CAPABILITY_MBS_PER_SECOND_POWERSAVE0x16
+#define HFI_CAPABILITY_MAX_VIDEOCORES  0x2B
 
 struct hfi_capability {
u32 capability_type;
diff --git a/drivers/media/platform/qcom/venus/hfi_parser.h 
b/drivers/media/platform/qcom/venus/hfi_parser.h
index 3e931c7..264e6dd 100644
--- a/drivers/media/platform/qcom/venus/hfi_parser.h
+++ b/drivers/media/platform/qcom/venus/hfi_parser.h
@@ -107,4 +107,9 @@ static inline u32 frate_step(struct venus_inst *inst)
return cap_step(inst, HFI_CAPABILITY_FRAMERATE);
 }
 
+static inline u32 core_num_max(struct venus_inst *inst)
+{
+   return cap_max(inst, HFI_CAPABILITY_MAX_VIDEOCORES);
+}
+
 #endif
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH 2/5] media: venus: Initialize codec data

2019-06-11 Thread Aniket Masule
Initialize the codec data with core resources.

Signed-off-by: Aniket Masule 
---
 drivers/media/platform/qcom/venus/helpers.c | 30 +
 drivers/media/platform/qcom/venus/helpers.h |  1 +
 drivers/media/platform/qcom/venus/vdec.c|  4 
 drivers/media/platform/qcom/venus/venc.c|  4 
 4 files changed, 39 insertions(+)

diff --git a/drivers/media/platform/qcom/venus/helpers.c 
b/drivers/media/platform/qcom/venus/helpers.c
index 5cad601..f7f724b 100644
--- a/drivers/media/platform/qcom/venus/helpers.c
+++ b/drivers/media/platform/qcom/venus/helpers.c
@@ -715,6 +715,36 @@ int venus_helper_set_core_usage(struct venus_inst *inst, 
u32 usage)
 }
 EXPORT_SYMBOL_GPL(venus_helper_set_core_usage);
 
+int venus_helper_init_codec_data(struct venus_inst *inst)
+{
+   const struct codec_data *codec_data;
+   unsigned int i, codec_data_size;
+   u32 pixfmt;
+   int ret = 0;
+
+   if (!IS_V4(inst->core))
+   return 0;
+
+   codec_data = inst->core->res->codec_data;
+   codec_data_size = inst->core->res->codec_data_size;
+   pixfmt = inst->session_type == VIDC_SESSION_TYPE_DEC ?
+   inst->fmt_out->pixfmt : inst->fmt_cap->pixfmt;
+
+   for (i = 0; i < codec_data_size; i++) {
+   if (codec_data[i].pixfmt == pixfmt &&
+   codec_data[i].session_type == inst->session_type) {
+   inst->clk_data.codec_data = _data[i];
+   break;
+   }
+   }
+
+   if (!inst->clk_data.codec_data)
+   ret = -EINVAL;
+
+   return ret;
+}
+EXPORT_SYMBOL_GPL(venus_helper_init_codec_data);
+
 int venus_helper_set_num_bufs(struct venus_inst *inst, unsigned int input_bufs,
  unsigned int output_bufs,
  unsigned int output2_bufs)
diff --git a/drivers/media/platform/qcom/venus/helpers.h 
b/drivers/media/platform/qcom/venus/helpers.h
index 2475f284..f9360a8 100644
--- a/drivers/media/platform/qcom/venus/helpers.h
+++ b/drivers/media/platform/qcom/venus/helpers.h
@@ -41,6 +41,7 @@ int venus_helper_set_output_resolution(struct venus_inst 
*inst,
   unsigned int width, unsigned int height,
   u32 buftype);
 int venus_helper_set_work_mode(struct venus_inst *inst, u32 mode);
+int venus_helper_init_codec_data(struct venus_inst *inst);
 int venus_helper_set_core_usage(struct venus_inst *inst, u32 usage);
 int venus_helper_set_num_bufs(struct venus_inst *inst, unsigned int input_bufs,
  unsigned int output_bufs,
diff --git a/drivers/media/platform/qcom/venus/vdec.c 
b/drivers/media/platform/qcom/venus/vdec.c
index 282de21..51795fd 100644
--- a/drivers/media/platform/qcom/venus/vdec.c
+++ b/drivers/media/platform/qcom/venus/vdec.c
@@ -660,6 +660,10 @@ static int vdec_init_session(struct venus_inst *inst)
if (ret)
goto deinit;
 
+   ret = venus_helper_init_codec_data(inst);
+   if (ret)
+   goto deinit;
+
return 0;
 deinit:
hfi_session_deinit(inst);
diff --git a/drivers/media/platform/qcom/venus/venc.c 
b/drivers/media/platform/qcom/venus/venc.c
index 32cff29..792cdce 100644
--- a/drivers/media/platform/qcom/venus/venc.c
+++ b/drivers/media/platform/qcom/venus/venc.c
@@ -847,6 +847,10 @@ static int venc_init_session(struct venus_inst *inst)
if (ret)
goto deinit;
 
+   ret = venus_helper_init_codec_data(inst);
+   if (ret)
+   goto deinit;
+
ret = venc_set_properties(inst);
if (ret)
goto deinit;
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH v2 0/5] media: venus: Update clock scaling and core selection

2019-06-11 Thread Aniket Masule
In this patch series, clock scaling and core selection methods are
updated. Current clock scaling and core selection methods are same
for vpu4 and previous versions. Introducing load calculations using
vpp cycles, which indicates the cycles required by video hardware to
process each macroblock. Clock scaling is now done more precisely using
vpp cycles. Instance is assigned to core with minimum load, instead of
of static assignment

Changes since v1:
 - Corrected VPP cycles entries in codec data table.
 - Removed filled_len from arguments to calculate_inst_freq, 
   filled_len won't be used in this patch series.
   filled_len will be used in clock scaling based on bitrate.

Aniket Masule (5):
  media: venus: Add codec data table
  media: venus: Initialize codec data
  media: venus: Update clock scaling
  media: venus: Add interface for load per core
  media: venus: Update core selection

 drivers/media/platform/qcom/venus/core.c   |  13 ++
 drivers/media/platform/qcom/venus/core.h   |  15 ++
 drivers/media/platform/qcom/venus/helpers.c| 186 +++--
 drivers/media/platform/qcom/venus/helpers.h|   3 +-
 drivers/media/platform/qcom/venus/hfi_helper.h |   1 +
 drivers/media/platform/qcom/venus/hfi_parser.h |   5 +
 drivers/media/platform/qcom/venus/vdec.c   |   9 +-
 drivers/media/platform/qcom/venus/venc.c   |   8 +-
 8 files changed, 226 insertions(+), 14 deletions(-)

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH 1/5] media: venus: Add codec data table

2019-06-11 Thread Aniket Masule
Add vpp cycles for for different types of codec
It indicates the cycles required by video hardware
to process each macroblock.

Signed-off-by: Aniket Masule 
---
 drivers/media/platform/qcom/venus/core.c | 13 +
 drivers/media/platform/qcom/venus/core.h | 15 +++
 2 files changed, 28 insertions(+)

diff --git a/drivers/media/platform/qcom/venus/core.c 
b/drivers/media/platform/qcom/venus/core.c
index 7393667..43eb446 100644
--- a/drivers/media/platform/qcom/venus/core.c
+++ b/drivers/media/platform/qcom/venus/core.c
@@ -473,9 +473,22 @@ static __maybe_unused int venus_runtime_resume(struct 
device *dev)
{  244800, 1 }, /* 1920x1080@30 */
 };
 
+static struct codec_data sdm845_codec_data[] =  {
+   { V4L2_PIX_FMT_H264, VIDC_SESSION_TYPE_ENC, 675 },
+   { V4L2_PIX_FMT_HEVC, VIDC_SESSION_TYPE_ENC, 675 },
+   { V4L2_PIX_FMT_VP8, VIDC_SESSION_TYPE_ENC, 675 },
+   { V4L2_PIX_FMT_MPEG2, VIDC_SESSION_TYPE_DEC, 200 },
+   { V4L2_PIX_FMT_H264, VIDC_SESSION_TYPE_DEC, 200 },
+   { V4L2_PIX_FMT_HEVC, VIDC_SESSION_TYPE_DEC, 200 },
+   { V4L2_PIX_FMT_VP8, VIDC_SESSION_TYPE_DEC, 200 },
+   { V4L2_PIX_FMT_VP9, VIDC_SESSION_TYPE_DEC, 200 },
+};
+
 static const struct venus_resources sdm845_res = {
.freq_tbl = sdm845_freq_table,
.freq_tbl_size = ARRAY_SIZE(sdm845_freq_table),
+   .codec_data = sdm845_codec_data,
+   .codec_data_size = ARRAY_SIZE(sdm845_codec_data),
.clks = {"core", "iface", "bus" },
.clks_num = 3,
.max_load = 2563200,
diff --git a/drivers/media/platform/qcom/venus/core.h 
b/drivers/media/platform/qcom/venus/core.h
index 7a3feb5..b1a9b43 100644
--- a/drivers/media/platform/qcom/venus/core.h
+++ b/drivers/media/platform/qcom/venus/core.h
@@ -35,12 +35,20 @@ struct reg_val {
u32 value;
 };
 
+struct codec_data {
+u32 pixfmt;
+u32 session_type;
+int vpp_cycles;
+};
+
 struct venus_resources {
u64 dma_mask;
const struct freq_tbl *freq_tbl;
unsigned int freq_tbl_size;
const struct reg_val *reg_tbl;
unsigned int reg_tbl_size;
+   const struct codec_data *codec_data;
+   unsigned int codec_data_size;
const char * const clks[VIDC_CLKS_NUM_MAX];
unsigned int clks_num;
enum hfi_version hfi_version;
@@ -216,6 +224,12 @@ struct venus_buffer {
struct list_head ref_list;
 };
 
+struct clock_data {
+   u32 core_id;
+   unsigned long freq;
+   struct codec_data *codec_data;
+};
+
 #define to_venus_buffer(ptr)   container_of(ptr, struct venus_buffer, vb)
 
 /**
@@ -275,6 +289,7 @@ struct venus_inst {
struct list_head list;
struct mutex lock;
struct venus_core *core;
+   struct clock_data clk_data;
struct list_head dpbbufs;
struct list_head internalbufs;
struct list_head registeredbufs;
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH 5/5] media: venus: Update core selection

2019-05-30 Thread Aniket Masule
Present core assignment is static. Introduced load balancing
across the cores. Load on earch core is calculated and core
with minimum load is assigned to given instance.

Signed-off-by: Aniket Masule 
---
 drivers/media/platform/qcom/venus/helpers.c | 50 +
 drivers/media/platform/qcom/venus/helpers.h |  2 +-
 drivers/media/platform/qcom/venus/vdec.c|  5 +--
 drivers/media/platform/qcom/venus/venc.c|  4 ++-
 4 files changed, 51 insertions(+), 10 deletions(-)

diff --git a/drivers/media/platform/qcom/venus/helpers.c 
b/drivers/media/platform/qcom/venus/helpers.c
index 692ba3b..6ce202f 100644
--- a/drivers/media/platform/qcom/venus/helpers.c
+++ b/drivers/media/platform/qcom/venus/helpers.c
@@ -500,6 +500,16 @@ static int load_scale_clocks(struct venus_inst *inst)
return scale_clocks_vpu4(inst);
 }
 
+int set_core_usage(struct venus_inst *inst, u32 usage)
+{
+   const u32 ptype = HFI_PROPERTY_CONFIG_VIDEOCORES_USAGE;
+   struct hfi_videocores_usage_type cu;
+
+   cu.video_core_enable_mask = usage;
+
+   return hfi_session_set_property(inst, ptype, );
+}
+
 static void fill_buffer_desc(const struct venus_buffer *buf,
 struct hfi_buffer_desc *bd, bool response)
 {
@@ -803,19 +813,47 @@ int venus_helper_set_work_mode(struct venus_inst *inst, 
u32 mode)
 }
 EXPORT_SYMBOL_GPL(venus_helper_set_work_mode);
 
-int venus_helper_set_core_usage(struct venus_inst *inst, u32 usage)
+int venus_helper_decide_core(struct venus_inst *inst, u32 cores_max)
 {
-   const u32 ptype = HFI_PROPERTY_CONFIG_VIDEOCORES_USAGE;
-   struct hfi_videocores_usage_type cu;
+   struct venus_core *core = inst->core;
+   u32 min_core_id = 0, core0_load = 0, core1_load = 0;
+   unsigned long min_load, max_freq, cur_inst_load;
+   int ret;
 
if (!IS_V4(inst->core))
return 0;
 
-   cu.video_core_enable_mask = usage;
+   core0_load = load_per_core(core, VIDC_CORE_ID_1);
+   core1_load = load_per_core(core, VIDC_CORE_ID_2);
 
-   return hfi_session_set_property(inst, ptype, );
+   min_core_id = core0_load < core1_load ? VIDC_CORE_ID_1 : VIDC_CORE_ID_2;
+   min_load = min(core0_load, core1_load);
+
+   if (cores_max < VIDC_CORE_ID_1) {
+   min_core_id = VIDC_CORE_ID_1;
+   min_load = core0_load;
+   }
+
+   cur_inst_load = load_per_instance(inst) *
+   inst->clk_data.codec_data->vpp_cycles;
+   max_freq = core->res->freq_tbl[0].freq;
+
+   if ((cur_inst_load + min_load)  > max_freq) {
+   dev_warn(core->dev, "HW is overloaded, needed: %lu max: %lu\n",
+cur_inst_load, max_freq);
+   return -EINVAL;
+   }
+
+   ret = set_core_usage(inst, min_core_id);
+
+   if (ret)
+   return ret;
+
+   inst->clk_data.core_id = min_core_id;
+
+   return 0;
 }
-EXPORT_SYMBOL_GPL(venus_helper_set_core_usage);
+EXPORT_SYMBOL_GPL(venus_helper_decide_core);
 
 int venus_helper_init_codec_data(struct venus_inst *inst)
 {
diff --git a/drivers/media/platform/qcom/venus/helpers.h 
b/drivers/media/platform/qcom/venus/helpers.h
index f9360a8..c41ceb3 100644
--- a/drivers/media/platform/qcom/venus/helpers.h
+++ b/drivers/media/platform/qcom/venus/helpers.h
@@ -42,7 +42,7 @@ int venus_helper_set_output_resolution(struct venus_inst 
*inst,
   u32 buftype);
 int venus_helper_set_work_mode(struct venus_inst *inst, u32 mode);
 int venus_helper_init_codec_data(struct venus_inst *inst);
-int venus_helper_set_core_usage(struct venus_inst *inst, u32 usage);
+int venus_helper_decide_core(struct venus_inst *inst, u32 cores_max);
 int venus_helper_set_num_bufs(struct venus_inst *inst, unsigned int input_bufs,
  unsigned int output_bufs,
  unsigned int output2_bufs);
diff --git a/drivers/media/platform/qcom/venus/vdec.c 
b/drivers/media/platform/qcom/venus/vdec.c
index 51795fd..9f988ba 100644
--- a/drivers/media/platform/qcom/venus/vdec.c
+++ b/drivers/media/platform/qcom/venus/vdec.c
@@ -544,14 +544,15 @@ static int vdec_output_conf(struct venus_inst *inst)
u32 height = inst->out_height;
u32 out_fmt, out2_fmt;
bool ubwc = false;
-   u32 ptype;
+   u32 ptype, cores_max;
int ret;
 
ret = venus_helper_set_work_mode(inst, VIDC_WORK_MODE_2);
if (ret)
return ret;
 
-   ret = venus_helper_set_core_usage(inst, VIDC_CORE_ID_1);
+   cores_max = core_num_max(inst);
+   ret = venus_helper_decide_core(inst, cores_max);
if (ret)
return ret;
 
diff --git a/drivers/media/platform/qcom/venus/venc.c 
b/drivers/media/platform/qcom/venus/venc.c
index 792cdce..ed39efd 100644
--- a/drivers/media/platform/qcom/venus/venc.c
+++ b/drivers/media/platform/qcom

[PATCH 4/5] media: venus: Add interface for load per core

2019-05-30 Thread Aniket Masule
Add and interface to calculate load per core. Also,
add an interface to get maximum cores available with
video. This interface is preparation for updating core
selection.

Signed-off-by: Aniket Masule 
---
 drivers/media/platform/qcom/venus/helpers.c| 18 ++
 drivers/media/platform/qcom/venus/hfi_helper.h |  1 +
 drivers/media/platform/qcom/venus/hfi_parser.h |  5 +
 3 files changed, 24 insertions(+)

diff --git a/drivers/media/platform/qcom/venus/helpers.c 
b/drivers/media/platform/qcom/venus/helpers.c
index f2470af..692ba3b 100644
--- a/drivers/media/platform/qcom/venus/helpers.c
+++ b/drivers/media/platform/qcom/venus/helpers.c
@@ -331,6 +331,24 @@ static u32 load_per_instance(struct venus_inst *inst)
return mbs * inst->fps;
 }
 
+static u32 load_per_core(struct venus_core *core, u32 core_id)
+{
+   struct venus_inst *inst = NULL;
+   u32 mbs_per_sec = 0, load = 0;
+
+   mutex_lock(>lock);
+   list_for_each_entry(inst, >instances, list) {
+   if (!(inst->clk_data.core_id == core_id))
+   continue;
+
+   mbs_per_sec += load_per_instance(inst);
+   load += mbs_per_sec * inst->clk_data.codec_data->vpp_cycles;
+   }
+   mutex_unlock(>lock);
+
+   return load;
+}
+
 static u32 load_per_type(struct venus_core *core, u32 session_type)
 {
struct venus_inst *inst = NULL;
diff --git a/drivers/media/platform/qcom/venus/hfi_helper.h 
b/drivers/media/platform/qcom/venus/hfi_helper.h
index 34ea503..3677e2e 100644
--- a/drivers/media/platform/qcom/venus/hfi_helper.h
+++ b/drivers/media/platform/qcom/venus/hfi_helper.h
@@ -559,6 +559,7 @@ struct hfi_bitrate {
 #define HFI_CAPABILITY_LCU_SIZE0x14
 #define HFI_CAPABILITY_HIER_P_HYBRID_NUM_ENH_LAYERS0x15
 #define HFI_CAPABILITY_MBS_PER_SECOND_POWERSAVE0x16
+#define HFI_CAPABILITY_MAX_VIDEOCORES  0x2B
 
 struct hfi_capability {
u32 capability_type;
diff --git a/drivers/media/platform/qcom/venus/hfi_parser.h 
b/drivers/media/platform/qcom/venus/hfi_parser.h
index 3e931c7..264e6dd 100644
--- a/drivers/media/platform/qcom/venus/hfi_parser.h
+++ b/drivers/media/platform/qcom/venus/hfi_parser.h
@@ -107,4 +107,9 @@ static inline u32 frate_step(struct venus_inst *inst)
return cap_step(inst, HFI_CAPABILITY_FRAMERATE);
 }
 
+static inline u32 core_num_max(struct venus_inst *inst)
+{
+   return cap_max(inst, HFI_CAPABILITY_MAX_VIDEOCORES);
+}
+
 #endif
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH 3/5] media: venus: Update clock scaling

2019-05-30 Thread Aniket Masule
Current clock scaling calculations are same for vpu4 and
previous versions. For vpu4, Clock scaling calculations
are updated with cycles/mb. This helps in getting precise
clock required.

Signed-off-by: Aniket Masule 
---
 drivers/media/platform/qcom/venus/helpers.c | 91 +++--
 1 file changed, 87 insertions(+), 4 deletions(-)

diff --git a/drivers/media/platform/qcom/venus/helpers.c 
b/drivers/media/platform/qcom/venus/helpers.c
index f7f724b..f2470af 100644
--- a/drivers/media/platform/qcom/venus/helpers.c
+++ b/drivers/media/platform/qcom/venus/helpers.c
@@ -348,8 +348,9 @@ static u32 load_per_type(struct venus_core *core, u32 
session_type)
return mbs_per_sec;
 }
 
-static int load_scale_clocks(struct venus_core *core)
+static int scale_clocks(struct venus_inst *inst)
 {
+   struct venus_core *core = inst->core;
const struct freq_tbl *table = core->res->freq_tbl;
unsigned int num_rows = core->res->freq_tbl_size;
unsigned long freq = table[0].freq;
@@ -398,6 +399,89 @@ static int load_scale_clocks(struct venus_core *core)
return ret;
 }
 
+static unsigned long calculate_inst_freq(struct venus_inst *inst,
+u32 filled_len)
+{
+   unsigned long vpp_cycles = 0;
+   u32 mbs_per_sec;
+
+   mbs_per_sec = load_per_instance(inst);
+   vpp_cycles = mbs_per_sec * inst->clk_data.codec_data->vpp_cycles;
+   /* 21 / 20 is overhead factor */
+   vpp_cycles += vpp_cycles / 20;
+
+   return vpp_cycles;
+}
+
+static int scale_clocks_vpu4(struct venus_inst *inst)
+{
+   struct venus_core *core = inst->core;
+   const struct freq_tbl *table = core->res->freq_tbl;
+   unsigned int num_rows = core->res->freq_tbl_size;
+
+   struct clk *clk = core->clks[0];
+   struct device *dev = core->dev;
+
+   unsigned int i;
+   u32 filled_len = 0;
+   unsigned long freq = 0, freq_core0 = 0, freq_core1 = 0;
+   int ret;
+
+   freq = calculate_inst_freq(inst, filled_len);
+
+   if (freq > table[0].freq)
+   goto err;
+
+   for (i = 0; i < num_rows; i++) {
+   if (freq > table[i].freq)
+   break;
+   freq = table[i].freq;
+   }
+
+   inst->clk_data.freq = freq;
+
+   mutex_lock(>lock);
+   list_for_each_entry(inst, >instances, list) {
+   if (inst->clk_data.core_id == VIDC_CORE_ID_1) {
+   freq_core0 += inst->clk_data.freq;
+   } else if (inst->clk_data.core_id == VIDC_CORE_ID_2) {
+   freq_core1 += inst->clk_data.freq;
+   } else if (inst->clk_data.core_id == VIDC_CORE_ID_3) {
+   freq_core0 += inst->clk_data.freq;
+   freq_core1 += inst->clk_data.freq;
+   }
+   }
+   mutex_unlock(>lock);
+
+   freq = max(freq_core0, freq_core1);
+
+   ret = clk_set_rate(clk, freq);
+   if (ret)
+   goto err;
+
+   ret = clk_set_rate(core->core0_clk, freq);
+   if (ret)
+   goto err;
+
+   ret = clk_set_rate(core->core1_clk, freq);
+   if (ret)
+   goto err;
+
+   return 0;
+
+err:
+   dev_err(dev, "failed to set clock rate %lu (%d)\n", freq, ret);
+   return ret;
+}
+
+static int load_scale_clocks(struct venus_inst *inst)
+{
+   if (IS_V3(inst->core) || IS_V1(inst->core))
+   return scale_clocks(inst);
+   else
+   return scale_clocks_vpu4(inst);
+}
+
 static void fill_buffer_desc(const struct venus_buffer *buf,
 struct hfi_buffer_desc *bd, bool response)
 {
@@ -1053,7 +1137,7 @@ void venus_helper_vb2_stop_streaming(struct vb2_queue *q)
 
venus_helper_free_dpb_bufs(inst);
 
-   load_scale_clocks(core);
+   load_scale_clocks(inst);
INIT_LIST_HEAD(>registeredbufs);
}
 
@@ -1070,7 +1154,6 @@ void venus_helper_vb2_stop_streaming(struct vb2_queue *q)
 
 int venus_helper_vb2_start_streaming(struct venus_inst *inst)
 {
-   struct venus_core *core = inst->core;
int ret;
 
ret = intbufs_alloc(inst);
@@ -1081,7 +1164,7 @@ int venus_helper_vb2_start_streaming(struct venus_inst 
*inst)
if (ret)
goto err_bufs_free;
 
-   load_scale_clocks(core);
+   load_scale_clocks(inst);
 
ret = hfi_session_load_res(inst);
if (ret)
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH 1/5] media: venus: Add codec data table

2019-05-30 Thread Aniket Masule
Add vpp cycles for for different types of codec
It indicates the cycles required by video hardware
to process each macroblock.

Signed-off-by: Aniket Masule 
---
 drivers/media/platform/qcom/venus/core.c | 13 +
 drivers/media/platform/qcom/venus/core.h | 15 +++
 2 files changed, 28 insertions(+)

diff --git a/drivers/media/platform/qcom/venus/core.c 
b/drivers/media/platform/qcom/venus/core.c
index 7393667..e7ebea1 100644
--- a/drivers/media/platform/qcom/venus/core.c
+++ b/drivers/media/platform/qcom/venus/core.c
@@ -473,9 +473,22 @@ static __maybe_unused int venus_runtime_resume(struct 
device *dev)
{  244800, 1 }, /* 1920x1080@30 */
 };
 
+static struct codec_data sdm845_codec_data[] =  {
+   { V4L2_PIX_FMT_H264, VIDC_SESSION_TYPE_ENC, 125 },
+   { V4L2_PIX_FMT_HEVC, VIDC_SESSION_TYPE_ENC, 125 },
+   { V4L2_PIX_FMT_VP8, VIDC_SESSION_TYPE_ENC, 125 },
+   { V4L2_PIX_FMT_MPEG2, VIDC_SESSION_TYPE_DEC, 50 },
+   { V4L2_PIX_FMT_H264, VIDC_SESSION_TYPE_DEC, 50 },
+   { V4L2_PIX_FMT_HEVC, VIDC_SESSION_TYPE_DEC, 50 },
+   { V4L2_PIX_FMT_VP8, VIDC_SESSION_TYPE_DEC, 50 },
+   { V4L2_PIX_FMT_VP9, VIDC_SESSION_TYPE_DEC, 50 },
+};
+
 static const struct venus_resources sdm845_res = {
.freq_tbl = sdm845_freq_table,
.freq_tbl_size = ARRAY_SIZE(sdm845_freq_table),
+   .codec_data = sdm845_codec_data,
+   .codec_data_size = ARRAY_SIZE(sdm845_codec_data),
.clks = {"core", "iface", "bus" },
.clks_num = 3,
.max_load = 2563200,
diff --git a/drivers/media/platform/qcom/venus/core.h 
b/drivers/media/platform/qcom/venus/core.h
index 7a3feb5..b1a9b43 100644
--- a/drivers/media/platform/qcom/venus/core.h
+++ b/drivers/media/platform/qcom/venus/core.h
@@ -35,12 +35,20 @@ struct reg_val {
u32 value;
 };
 
+struct codec_data {
+u32 pixfmt;
+u32 session_type;
+int vpp_cycles;
+};
+
 struct venus_resources {
u64 dma_mask;
const struct freq_tbl *freq_tbl;
unsigned int freq_tbl_size;
const struct reg_val *reg_tbl;
unsigned int reg_tbl_size;
+   const struct codec_data *codec_data;
+   unsigned int codec_data_size;
const char * const clks[VIDC_CLKS_NUM_MAX];
unsigned int clks_num;
enum hfi_version hfi_version;
@@ -216,6 +224,12 @@ struct venus_buffer {
struct list_head ref_list;
 };
 
+struct clock_data {
+   u32 core_id;
+   unsigned long freq;
+   struct codec_data *codec_data;
+};
+
 #define to_venus_buffer(ptr)   container_of(ptr, struct venus_buffer, vb)
 
 /**
@@ -275,6 +289,7 @@ struct venus_inst {
struct list_head list;
struct mutex lock;
struct venus_core *core;
+   struct clock_data clk_data;
struct list_head dpbbufs;
struct list_head internalbufs;
struct list_head registeredbufs;
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH 2/5] media: venus: Initialize codec data

2019-05-30 Thread Aniket Masule
Initialize the codec data with core resources.

Signed-off-by: Aniket Masule 
---
 drivers/media/platform/qcom/venus/helpers.c | 30 +
 drivers/media/platform/qcom/venus/helpers.h |  1 +
 drivers/media/platform/qcom/venus/vdec.c|  4 
 drivers/media/platform/qcom/venus/venc.c|  4 
 4 files changed, 39 insertions(+)

diff --git a/drivers/media/platform/qcom/venus/helpers.c 
b/drivers/media/platform/qcom/venus/helpers.c
index 5cad601..f7f724b 100644
--- a/drivers/media/platform/qcom/venus/helpers.c
+++ b/drivers/media/platform/qcom/venus/helpers.c
@@ -715,6 +715,36 @@ int venus_helper_set_core_usage(struct venus_inst *inst, 
u32 usage)
 }
 EXPORT_SYMBOL_GPL(venus_helper_set_core_usage);
 
+int venus_helper_init_codec_data(struct venus_inst *inst)
+{
+   const struct codec_data *codec_data;
+   unsigned int i, codec_data_size;
+   u32 pixfmt;
+   int ret = 0;
+
+   if (!IS_V4(inst->core))
+   return 0;
+
+   codec_data = inst->core->res->codec_data;
+   codec_data_size = inst->core->res->codec_data_size;
+   pixfmt = inst->session_type == VIDC_SESSION_TYPE_DEC ?
+   inst->fmt_out->pixfmt : inst->fmt_cap->pixfmt;
+
+   for (i = 0; i < codec_data_size; i++) {
+   if (codec_data[i].pixfmt == pixfmt &&
+   codec_data[i].session_type == inst->session_type) {
+   inst->clk_data.codec_data = _data[i];
+   break;
+   }
+   }
+
+   if (!inst->clk_data.codec_data)
+   ret = -EINVAL;
+
+   return ret;
+}
+EXPORT_SYMBOL_GPL(venus_helper_init_codec_data);
+
 int venus_helper_set_num_bufs(struct venus_inst *inst, unsigned int input_bufs,
  unsigned int output_bufs,
  unsigned int output2_bufs)
diff --git a/drivers/media/platform/qcom/venus/helpers.h 
b/drivers/media/platform/qcom/venus/helpers.h
index 2475f284..f9360a8 100644
--- a/drivers/media/platform/qcom/venus/helpers.h
+++ b/drivers/media/platform/qcom/venus/helpers.h
@@ -41,6 +41,7 @@ int venus_helper_set_output_resolution(struct venus_inst 
*inst,
   unsigned int width, unsigned int height,
   u32 buftype);
 int venus_helper_set_work_mode(struct venus_inst *inst, u32 mode);
+int venus_helper_init_codec_data(struct venus_inst *inst);
 int venus_helper_set_core_usage(struct venus_inst *inst, u32 usage);
 int venus_helper_set_num_bufs(struct venus_inst *inst, unsigned int input_bufs,
  unsigned int output_bufs,
diff --git a/drivers/media/platform/qcom/venus/vdec.c 
b/drivers/media/platform/qcom/venus/vdec.c
index 282de21..51795fd 100644
--- a/drivers/media/platform/qcom/venus/vdec.c
+++ b/drivers/media/platform/qcom/venus/vdec.c
@@ -660,6 +660,10 @@ static int vdec_init_session(struct venus_inst *inst)
if (ret)
goto deinit;
 
+   ret = venus_helper_init_codec_data(inst);
+   if (ret)
+   goto deinit;
+
return 0;
 deinit:
hfi_session_deinit(inst);
diff --git a/drivers/media/platform/qcom/venus/venc.c 
b/drivers/media/platform/qcom/venus/venc.c
index 32cff29..792cdce 100644
--- a/drivers/media/platform/qcom/venus/venc.c
+++ b/drivers/media/platform/qcom/venus/venc.c
@@ -847,6 +847,10 @@ static int venc_init_session(struct venus_inst *inst)
if (ret)
goto deinit;
 
+   ret = venus_helper_init_codec_data(inst);
+   if (ret)
+   goto deinit;
+
ret = venc_set_properties(inst);
if (ret)
goto deinit;
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project



[PATCH 0/5] media: venus: Update clock scaling and core selection

2019-05-30 Thread Aniket Masule
In this patch series, clock scaling and core selection methods are
updated. Current clock scaling and core selection methods are same
for vpu4 and previous versions. Introducing load calculations using
vpp cycles, which indicates the cycles required by video hardware to
process each macroblock. Clock scaling is now done more precisely using
vpp cycles. Instance is assigned to core with minimum load, instead of
of static assignment.

Aniket Masule (5):
  media: venus: Add codec data table
  media: venus: Initialize codec data
  media: venus: Update clock scaling
  media: venus: Add interface for load per core
  media: venus: Update core selection

 drivers/media/platform/qcom/venus/core.c   |  13 ++
 drivers/media/platform/qcom/venus/core.h   |  15 ++
 drivers/media/platform/qcom/venus/helpers.c| 189 +++--
 drivers/media/platform/qcom/venus/helpers.h|   3 +-
 drivers/media/platform/qcom/venus/hfi_helper.h |   1 +
 drivers/media/platform/qcom/venus/hfi_parser.h |   5 +
 drivers/media/platform/qcom/venus/vdec.c   |   9 +-
 drivers/media/platform/qcom/venus/venc.c   |   8 +-
 8 files changed, 229 insertions(+), 14 deletions(-)

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project