Re: [x265] [PATCH 3 of 4] limitTU : use neighbouring CUs' TU depth to limit 1st subTU's depth

2016-11-17 Thread Bhavna Hariharan
There is a mistake in the first patch, please apply the latest patch.


Regards,

Bhavna Hariharan

On Fri, Nov 18, 2016 at 9:49 AM, Bhavna Hariharan <
bha...@multicorewareinc.com> wrote:

> # HG changeset patch
> # User Bhavna Hariharan 
> # Date 1479380469 -19800
> #  Thu Nov 17 16:31:09 2016 +0530
> # Node ID d4425aa677c63b8486dde5bb716f9a191da6bdaa
> # Parent  07a4e4d785a69f719922129ca5997b12552bb4ab
> limitTU : use neighbouring CUs' TU depth to limit 1st subTU's depth
>
> diff -r 07a4e4d785a6 -r d4425aa677c6 source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp Thu Nov 17 12:19:38 2016 +0530
> +++ b/source/encoder/analysis.cpp Thu Nov 17 16:31:09 2016 +0530
> @@ -377,7 +377,7 @@
>  checkBestMode(md.pred[PRED_INTRA_NxN], depth);
>  }
>
> -if (limitTU == X265_TU_LIMIT_NEIGH && cuGeom.log2CUSize >= 4)
> +if ((limitTU & X265_TU_LIMIT_NEIGH) && cuGeom.log2CUSize >= 4)
>  {
>  CUData* ctu = md.bestMode->cu.m_encData->
> getPicCTU(parentCTU.m_cuAddr);
>  int8_t maxTUDepth = -1;
> @@ -954,7 +954,7 @@
>  bool skipRectAmp = false;
>  bool chooseMerge = false;
>
> -if (limitTU == X265_TU_LIMIT_NEIGH && cuGeom.log2CUSize >= 4)
> +if ((limitTU & X265_TU_LIMIT_NEIGH) && cuGeom.log2CUSize >= 4)
>  m_maxTUDepth = loadTUDepth(cuGeom, parentCTU);
>
>  SplitData splitData[4];
> @@ -1463,7 +1463,7 @@
>  if (m_param->rdLevel)
>  md.bestMode->reconYuv.copyToPicYuv(reconPic, cuAddr,
> cuGeom.absPartIdx);
>
> -if (limitTU == X265_TU_LIMIT_NEIGH && cuGeom.log2CUSize >= 4)
> +if ((limitTU & X265_TU_LIMIT_NEIGH) && cuGeom.log2CUSize >= 4)
>  {
>  if (mightNotSplit)
>  {
> @@ -1498,7 +1498,7 @@
>  md.pred[PRED_2Nx2N].rdCost = 0;
>  }
>
> -if (limitTU == X265_TU_LIMIT_NEIGH && cuGeom.log2CUSize >= 4)
> +if ((limitTU & X265_TU_LIMIT_NEIGH) && cuGeom.log2CUSize >= 4)
>  m_maxTUDepth =  loadTUDepth(cuGeom, parentCTU);
>
>  SplitData splitData[4];
> @@ -1827,7 +1827,7 @@
>  if (mightSplit)
>  addSplitFlagCost(*md.bestMode, cuGeom.depth);
>  }
> -if (limitTU == X265_TU_LIMIT_NEIGH && cuGeom.log2CUSize >= 4)
> +if ((limitTU & X265_TU_LIMIT_NEIGH) && cuGeom.log2CUSize >= 4)
>  {
>  if (mightNotSplit)
>  {
> diff -r 07a4e4d785a6 -r d4425aa677c6 source/encoder/search.cpp
> --- a/source/encoder/search.cpp Thu Nov 17 12:19:38 2016 +0530
> +++ b/source/encoder/search.cpp Thu Nov 17 16:31:09 2016 +0530
> @@ -2625,7 +2625,7 @@
>
>  uint32_t tuDepthRange[2];
>  cu.getInterTUQtDepthRange(tuDepthRange, 0);
> -if (limitTU == X265_TU_LIMIT_NEIGH)
> +if (limitTU & X265_TU_LIMIT_NEIGH)
>  {
>  int maxLog2CUSize = (int)g_log2Size[m_param->maxCUSize];
>  m_maxTUDepth = x265_clip3(maxLog2CUSize -
> (int32_t)tuDepthRange[1], maxLog2CUSize - (int32_t)tuDepthRange[0],
> m_maxTUDepth);
> @@ -2639,7 +2639,14 @@
>  memset(&m_cacheTU, 0, sizeof(TUInfoCache));
>
>  Cost costs;
> -estimateResidualQT(interMode, cuGeom, 0, 0, *resiYuv, costs,
> tuDepthRange);
> +if (limitTU == X265_TU_LIMIT_DFS_NEIGH)
> +{
> +int32_t tempDepth = m_maxTUDepth;
> +estimateResidualQT(interMode, cuGeom, 0, 0, *resiYuv, costs,
> tuDepthRange);
> +m_maxTUDepth = tempDepth;
> +}
> +else
> +estimateResidualQT(interMode, cuGeom, 0, 0, *resiYuv, costs,
> tuDepthRange);
>
>  uint32_t tqBypass = cu.m_tqBypass[0];
>  if (!tqBypass)
> @@ -2898,10 +2905,11 @@
>  uint32_t ycbf = 0, ucbf = 0, vcbf = 0;
>  for (uint32_t qIdx = 0, qPartIdx = absPartIdx; qIdx < 4; ++qIdx,
> qPartIdx += qNumParts)
>  {
> -if (limitTU == X265_TU_LIMIT_DFS && tuDepth == 0 && qIdx == 1)
> +if (limitTU & X265_TU_LIMIT_DFS && tuDepth == 0 && qIdx == 1)
>  {
> +m_maxTUDepth = cu.m_tuDepth[0];
>  // Fetch maximum TU depth of first sub partition to limit
> recursion of others
> -for (uint32_t i = 0; i < cuGeom.numPartitions / 4; i++)
> +for (uint32_t i = 1; i < cuGeom.numPartitions / 4; i++)
>  m_maxTUDepth = X265_MAX(m_maxTUDepth, cu.m_tuDepth[i]);
>  }
>  estimateResidualQT(mode, cuGeom, qPartIdx, tuDepth + 1, resiYuv,
> splitCost, depthRange, splitMore);
> @@ -2968,7 +2976,7 @@
>  }
>  }
>  }
> -else if (limitTU == X265_TU_LIMIT_DFS || limitTU ==
> X265_TU_LIMIT_NEIGH)
> +else if (limitTU & X265_TU_LIMIT_DFS_NEIGH)
>  {
>  if (bCheckSplit && m_maxTUDepth >= 0)
>  {
>
>
>
> Regards,
>
> Bhavna Hariharan
>
> On Thu, Nov 17, 2016 at 5:06 PM,  wrote:
>
>> # HG changeset patch
>> # User Bhavna Hariharan 
>> # Date 1479380469 -19800
>> #  Thu Nov 17 16:31:09 2016 +0530
>> # Node ID f5d3a5aedcdf20235ec76dbf9aba516da83e8dd1
>> # Parent  07a4e4d785a69f719922129ca5997b12552bb4ab
>> limitTU : use neighbouring CUs' TU depth to lim

Re: [x265] [PATCH 3 of 4] limitTU : use neighbouring CUs' TU depth to limit 1st subTU's depth

2016-11-17 Thread Bhavna Hariharan
# HG changeset patch
# User Bhavna Hariharan 
# Date 1479380469 -19800
#  Thu Nov 17 16:31:09 2016 +0530
# Node ID d4425aa677c63b8486dde5bb716f9a191da6bdaa
# Parent  07a4e4d785a69f719922129ca5997b12552bb4ab
limitTU : use neighbouring CUs' TU depth to limit 1st subTU's depth

diff -r 07a4e4d785a6 -r d4425aa677c6 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp Thu Nov 17 12:19:38 2016 +0530
+++ b/source/encoder/analysis.cpp Thu Nov 17 16:31:09 2016 +0530
@@ -377,7 +377,7 @@
 checkBestMode(md.pred[PRED_INTRA_NxN], depth);
 }

-if (limitTU == X265_TU_LIMIT_NEIGH && cuGeom.log2CUSize >= 4)
+if ((limitTU & X265_TU_LIMIT_NEIGH) && cuGeom.log2CUSize >= 4)
 {
 CUData* ctu =
md.bestMode->cu.m_encData->getPicCTU(parentCTU.m_cuAddr);
 int8_t maxTUDepth = -1;
@@ -954,7 +954,7 @@
 bool skipRectAmp = false;
 bool chooseMerge = false;

-if (limitTU == X265_TU_LIMIT_NEIGH && cuGeom.log2CUSize >= 4)
+if ((limitTU & X265_TU_LIMIT_NEIGH) && cuGeom.log2CUSize >= 4)
 m_maxTUDepth = loadTUDepth(cuGeom, parentCTU);

 SplitData splitData[4];
@@ -1463,7 +1463,7 @@
 if (m_param->rdLevel)
 md.bestMode->reconYuv.copyToPicYuv(reconPic, cuAddr,
cuGeom.absPartIdx);

-if (limitTU == X265_TU_LIMIT_NEIGH && cuGeom.log2CUSize >= 4)
+if ((limitTU & X265_TU_LIMIT_NEIGH) && cuGeom.log2CUSize >= 4)
 {
 if (mightNotSplit)
 {
@@ -1498,7 +1498,7 @@
 md.pred[PRED_2Nx2N].rdCost = 0;
 }

-if (limitTU == X265_TU_LIMIT_NEIGH && cuGeom.log2CUSize >= 4)
+if ((limitTU & X265_TU_LIMIT_NEIGH) && cuGeom.log2CUSize >= 4)
 m_maxTUDepth =  loadTUDepth(cuGeom, parentCTU);

 SplitData splitData[4];
@@ -1827,7 +1827,7 @@
 if (mightSplit)
 addSplitFlagCost(*md.bestMode, cuGeom.depth);
 }
-if (limitTU == X265_TU_LIMIT_NEIGH && cuGeom.log2CUSize >= 4)
+if ((limitTU & X265_TU_LIMIT_NEIGH) && cuGeom.log2CUSize >= 4)
 {
 if (mightNotSplit)
 {
diff -r 07a4e4d785a6 -r d4425aa677c6 source/encoder/search.cpp
--- a/source/encoder/search.cpp Thu Nov 17 12:19:38 2016 +0530
+++ b/source/encoder/search.cpp Thu Nov 17 16:31:09 2016 +0530
@@ -2625,7 +2625,7 @@

 uint32_t tuDepthRange[2];
 cu.getInterTUQtDepthRange(tuDepthRange, 0);
-if (limitTU == X265_TU_LIMIT_NEIGH)
+if (limitTU & X265_TU_LIMIT_NEIGH)
 {
 int maxLog2CUSize = (int)g_log2Size[m_param->maxCUSize];
 m_maxTUDepth = x265_clip3(maxLog2CUSize -
(int32_t)tuDepthRange[1], maxLog2CUSize - (int32_t)tuDepthRange[0],
m_maxTUDepth);
@@ -2639,7 +2639,14 @@
 memset(&m_cacheTU, 0, sizeof(TUInfoCache));

 Cost costs;
-estimateResidualQT(interMode, cuGeom, 0, 0, *resiYuv, costs,
tuDepthRange);
+if (limitTU == X265_TU_LIMIT_DFS_NEIGH)
+{
+int32_t tempDepth = m_maxTUDepth;
+estimateResidualQT(interMode, cuGeom, 0, 0, *resiYuv, costs,
tuDepthRange);
+m_maxTUDepth = tempDepth;
+}
+else
+estimateResidualQT(interMode, cuGeom, 0, 0, *resiYuv, costs,
tuDepthRange);

 uint32_t tqBypass = cu.m_tqBypass[0];
 if (!tqBypass)
@@ -2898,10 +2905,11 @@
 uint32_t ycbf = 0, ucbf = 0, vcbf = 0;
 for (uint32_t qIdx = 0, qPartIdx = absPartIdx; qIdx < 4; ++qIdx,
qPartIdx += qNumParts)
 {
-if (limitTU == X265_TU_LIMIT_DFS && tuDepth == 0 && qIdx == 1)
+if (limitTU & X265_TU_LIMIT_DFS && tuDepth == 0 && qIdx == 1)
 {
+m_maxTUDepth = cu.m_tuDepth[0];
 // Fetch maximum TU depth of first sub partition to limit
recursion of others
-for (uint32_t i = 0; i < cuGeom.numPartitions / 4; i++)
+for (uint32_t i = 1; i < cuGeom.numPartitions / 4; i++)
 m_maxTUDepth = X265_MAX(m_maxTUDepth, cu.m_tuDepth[i]);
 }
 estimateResidualQT(mode, cuGeom, qPartIdx, tuDepth + 1, resiYuv,
splitCost, depthRange, splitMore);
@@ -2968,7 +2976,7 @@
 }
 }
 }
-else if (limitTU == X265_TU_LIMIT_DFS || limitTU ==
X265_TU_LIMIT_NEIGH)
+else if (limitTU & X265_TU_LIMIT_DFS_NEIGH)
 {
 if (bCheckSplit && m_maxTUDepth >= 0)
 {



Regards,

Bhavna Hariharan

On Thu, Nov 17, 2016 at 5:06 PM,  wrote:

> # HG changeset patch
> # User Bhavna Hariharan 
> # Date 1479380469 -19800
> #  Thu Nov 17 16:31:09 2016 +0530
> # Node ID f5d3a5aedcdf20235ec76dbf9aba516da83e8dd1
> # Parent  07a4e4d785a69f719922129ca5997b12552bb4ab
> limitTU : use neighbouring CUs' TU depth to limit 1st subTU's depth
>
> diff -r 07a4e4d785a6 -r f5d3a5aedcdf source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp   Thu Nov 17 12:19:38 2016 +0530
> +++ b/source/encoder/analysis.cpp   Thu Nov 17 16:31:09 2016 +0530
> @@ -377,7 +377,7 @@
>  checkBestMode(md.pred[PRED_INTRA_NxN], depth);
>  }
>
> -if (limitTU == X265_TU_LIMIT_NEIGH && cuGeom.log2CUSize >= 4)
> +if ((limitTU & X

[x265] [PATCH 1 of 9] pcs: update design to have 'm_achivedFps' for every PCS Instance

2016-11-17 Thread praveen
# HG changeset patch
# User Praveen Tiwari 
# Date 1479128885 -19800
#  Mon Nov 14 18:38:05 2016 +0530
# Branch stable
# Node ID 8defd4e7b2e4875247e4ec95e0dd3b9630983526
# Parent  bdf273f9521784ceeda868222d415303a0bcf58b
pcs: update design to have 'm_achivedFps' for every PCS Instance

diff -r bdf273f95217 -r 8defd4e7b2e4 source/api-uhdkit.cpp
--- a/source/api-uhdkit.cpp Tue Nov 08 14:20:24 2016 +0530
+++ b/source/api-uhdkit.cpp Mon Nov 14 18:38:05 2016 +0530
@@ -206,8 +206,6 @@
 return -1;
 if (numEncoded > 0)
 {
-uhdkitEnc->m_achievedFps = numEncoded * 100.0 / 
(double)(endTime - startTime);
-uhdkitEnc->m_achievedFps = uhdkitEnc->m_achievedFps / 
uhdkitEnc->m_param->gops; // Achieved fps for each gop encoder
 uhdkitEnc->m_encodedFrameCount += numEncoded;
 controllerIndex = ((uhdkitEnc->m_encodedFrameCount - 1) / 
uhdkitEnc->m_param->x265Param->keyframeMax) % uhdkitEnc->m_param->gops;
 X265_CHECK(controllerIndex >= 0 && controllerIndex < 
uhdkitEnc->m_param->gops, "Invalid controllerIndex: %d, must be between 0 and 
%d\n", controllerIndex, uhdkitEnc->m_param->gops);
diff -r bdf273f95217 -r 8defd4e7b2e4 source/pcs/api-pcs.cpp
--- a/source/pcs/api-pcs.cppTue Nov 08 14:20:24 2016 +0530
+++ b/source/pcs/api-pcs.cppMon Nov 14 18:38:05 2016 +0530
@@ -211,6 +211,7 @@
 m_pcsParam->statusPrintInterval  = param->statusPrintInterval;
 m_curTimeStamp = m_lastTimeStamp = X265_NS::x265_mdate();
 m_framesWindow = 1;
+m_achievedFps = 0.0;
 m_outFrameCountOfLastAccumulatorReset = 0;
 time(&m_lastStatusOutputTime);
 
@@ -289,11 +290,11 @@
 int64_t elapsedEncTime = m_curTimeStamp - m_lastTimeStamp;
 int controllerIndex = ((uhdkitEnc->m_encodedFrameCount - 1) / 
uhdkitEnc->m_param->x265Param->keyframeMax) % uhdkitEnc->m_param->gops;
 X265_CHECK(controllerIndex >= 0 && controllerIndex < 
uhdkitEnc->m_param->gops, "Invalid controllerIndex: %d, must be between 0 and 
%d\n", controllerIndex, uhdkitEnc->m_param->gops);
-if (((m_bScenecut == 1) && elapsedEncTime > 0) || elapsedEncTime >= 
30 || uhdkitEnc->m_achievedFps < m_pcsParam->fpsSetPoint)
+if (((m_bScenecut == 1) && elapsedEncTime > 0) || elapsedEncTime >= 
30 || m_achievedFps < m_pcsParam->fpsSetPoint)
 {
 // Don't allow outrageously high frame rate measurements to skew 
the controller.
-uhdkitEnc->m_achievedFps = X265_MIN(uhdkitEnc->m_achievedFps, 4 * 
m_pcsParam->fpsSetPoint);
-error = (m_pcsParam->fpsSetPoint - uhdkitEnc->m_achievedFps) / 
m_pcsParam->fpsSetPoint;
+m_achievedFps = X265_MIN(m_achievedFps, 4 * 
m_pcsParam->fpsSetPoint);
+error = (m_pcsParam->fpsSetPoint - m_achievedFps) / 
m_pcsParam->fpsSetPoint;
 
 if (m_pcsParam->integralReset > 0)
 {
@@ -308,7 +309,7 @@
 {
 double lowerBound = (m_pcsParam->fpsSetPoint * 
SATURATION_RANGE_MIN) / 100.0;   /* Lower bound, 3% of set-point */
 double upperBound = (m_pcsParam->fpsSetPoint * 
SATURATION_RANGE_MAX) / 100.0;   /* Upper bound, 10% of set-point */
-double fpsDiff = (uhdkitEnc->m_achievedFps - 
m_pcsParam->fpsSetPoint);
+double fpsDiff =(m_achievedFps - m_pcsParam->fpsSetPoint);
 resetErrorAccumulater = (fpsDiff >= lowerBound && fpsDiff <= 
upperBound) || m_bScenecut; /* Steady state, or scenecut */
 }
 
@@ -388,7 +389,7 @@
 m_outFrameCountOfLastAccumulatorReset = uhdkitEnc->m_encodedFrameCount;
 m_lastTimeStamp = m_curTimeStamp;
 if (uhdkitEnc->m_reconfigParam->logLevel == UHDKIT_LOG_INFO)
-
uhdkit_pcs_printStatus(&uhdkitEnc->m_reconfigParam[controllerIndex], 
uhdkitEnc->m_achievedFps);
+
uhdkit_pcs_printStatus(&uhdkitEnc->m_reconfigParam[controllerIndex], 
m_achievedFps);
 }
 return true;
 }
@@ -398,6 +399,11 @@
 m_bScenecut = pic->frameData.bScenecut;
 }
 
+void pcs::uhdkit_pcs_update_fps(int64_t startTime, int64_t endTime, int 
numEncoded)
+{
+m_achievedFps = numEncoded * 100.0 / (double)(endTime - startTime);
+}
+
 int pcs::uhdkit_pcs_getControlParamValue(const x265_param *param, int index)
 {
 int controlParamValue[NUM_CONTROLLER] = { param->bEnableFastIntra, 
param->bEnableEarlySkip, param->bEnableRectInter,
diff -r bdf273f95217 -r 8defd4e7b2e4 source/pcs/pcs.h
--- a/source/pcs/pcs.h  Tue Nov 08 14:20:24 2016 +0530
+++ b/source/pcs/pcs.h  Mon Nov 14 18:38:05 2016 +0530
@@ -32,6 +32,7 @@
 /* variables handled by the PCS Instance */
 pcs_param*  m_pcsParam;
 pcs_controller* m_pcsController;
+double  m_achievedFps;
 int64_t m_curTimeStamp;
 int64_t m_lastTimeStamp;
 time_t  m_lastStatusOutputTime;
@@ -52,6 +53,9 @@
 /* This function updates the scenecut information */
 void uhdkit_pc

Re: [x265] [PATCH 1 of 9] pcs: update design to have 'm_achivedFps' for every PCS Instance

2016-11-17 Thread Praveen Tiwari
Please, ignore this patch. Thanks.


On Thu, Nov 17, 2016 at 8:51 PM,  wrote:

> # HG changeset patch
> # User Praveen Tiwari 
> # Date 1479128885 -19800
> #  Mon Nov 14 18:38:05 2016 +0530
> # Branch stable
> # Node ID 8defd4e7b2e4875247e4ec95e0dd3b9630983526
> # Parent  bdf273f9521784ceeda868222d415303a0bcf58b
> pcs: update design to have 'm_achivedFps' for every PCS Instance
>
> diff -r bdf273f95217 -r 8defd4e7b2e4 source/api-uhdkit.cpp
> --- a/source/api-uhdkit.cpp Tue Nov 08 14:20:24 2016 +0530
> +++ b/source/api-uhdkit.cpp Mon Nov 14 18:38:05 2016 +0530
> @@ -206,8 +206,6 @@
>  return -1;
>  if (numEncoded > 0)
>  {
> -uhdkitEnc->m_achievedFps = numEncoded * 100.0 /
> (double)(endTime - startTime);
> -uhdkitEnc->m_achievedFps = uhdkitEnc->m_achievedFps /
> uhdkitEnc->m_param->gops; // Achieved fps for each gop encoder
>  uhdkitEnc->m_encodedFrameCount += numEncoded;
>  controllerIndex = ((uhdkitEnc->m_encodedFrameCount - 1) /
> uhdkitEnc->m_param->x265Param->keyframeMax) % uhdkitEnc->m_param->gops;
>  X265_CHECK(controllerIndex >= 0 && controllerIndex <
> uhdkitEnc->m_param->gops, "Invalid controllerIndex: %d, must be between 0
> and %d\n", controllerIndex, uhdkitEnc->m_param->gops);
> diff -r bdf273f95217 -r 8defd4e7b2e4 source/pcs/api-pcs.cpp
> --- a/source/pcs/api-pcs.cppTue Nov 08 14:20:24 2016 +0530
> +++ b/source/pcs/api-pcs.cppMon Nov 14 18:38:05 2016 +0530
> @@ -211,6 +211,7 @@
>  m_pcsParam->statusPrintInterval  = param->statusPrintInterval;
>  m_curTimeStamp = m_lastTimeStamp = X265_NS::x265_mdate();
>  m_framesWindow = 1;
> +m_achievedFps = 0.0;
>  m_outFrameCountOfLastAccumulatorReset = 0;
>  time(&m_lastStatusOutputTime);
>
> @@ -289,11 +290,11 @@
>  int64_t elapsedEncTime = m_curTimeStamp - m_lastTimeStamp;
>  int controllerIndex = ((uhdkitEnc->m_encodedFrameCount - 1) /
> uhdkitEnc->m_param->x265Param->keyframeMax) % uhdkitEnc->m_param->gops;
>  X265_CHECK(controllerIndex >= 0 && controllerIndex <
> uhdkitEnc->m_param->gops, "Invalid controllerIndex: %d, must be between 0
> and %d\n", controllerIndex, uhdkitEnc->m_param->gops);
> -if (((m_bScenecut == 1) && elapsedEncTime > 0) || elapsedEncTime
> >= 30 || uhdkitEnc->m_achievedFps < m_pcsParam->fpsSetPoint)
> +if (((m_bScenecut == 1) && elapsedEncTime > 0) || elapsedEncTime
> >= 30 || m_achievedFps < m_pcsParam->fpsSetPoint)
>  {
>  // Don't allow outrageously high frame rate measurements to
> skew the controller.
> -uhdkitEnc->m_achievedFps = X265_MIN(uhdkitEnc->m_achievedFps,
> 4 * m_pcsParam->fpsSetPoint);
> -error = (m_pcsParam->fpsSetPoint - uhdkitEnc->m_achievedFps)
> / m_pcsParam->fpsSetPoint;
> +m_achievedFps = X265_MIN(m_achievedFps, 4 *
> m_pcsParam->fpsSetPoint);
> +error = (m_pcsParam->fpsSetPoint - m_achievedFps) /
> m_pcsParam->fpsSetPoint;
>
>  if (m_pcsParam->integralReset > 0)
>  {
> @@ -308,7 +309,7 @@
>  {
>  double lowerBound = (m_pcsParam->fpsSetPoint *
> SATURATION_RANGE_MIN) / 100.0;   /* Lower bound, 3% of set-point */
>  double upperBound = (m_pcsParam->fpsSetPoint *
> SATURATION_RANGE_MAX) / 100.0;   /* Upper bound, 10% of set-point */
> -double fpsDiff = (uhdkitEnc->m_achievedFps -
> m_pcsParam->fpsSetPoint);
> +double fpsDiff =(m_achievedFps -
> m_pcsParam->fpsSetPoint);
>  resetErrorAccumulater = (fpsDiff >= lowerBound && fpsDiff
> <= upperBound) || m_bScenecut; /* Steady state, or scenecut */
>  }
>
> @@ -388,7 +389,7 @@
>  m_outFrameCountOfLastAccumulatorReset = uhdkitEnc->m_
> encodedFrameCount;
>  m_lastTimeStamp = m_curTimeStamp;
>  if (uhdkitEnc->m_reconfigParam->logLevel == UHDKIT_LOG_INFO)
> -
> uhdkit_pcs_printStatus(&uhdkitEnc->m_reconfigParam[controllerIndex],
> uhdkitEnc->m_achievedFps);
> +
> uhdkit_pcs_printStatus(&uhdkitEnc->m_reconfigParam[controllerIndex],
> m_achievedFps);
>  }
>  return true;
>  }
> @@ -398,6 +399,11 @@
>  m_bScenecut = pic->frameData.bScenecut;
>  }
>
> +void pcs::uhdkit_pcs_update_fps(int64_t startTime, int64_t endTime, int
> numEncoded)
> +{
> +m_achievedFps = numEncoded * 100.0 / (double)(endTime -
> startTime);
> +}
> +
>  int pcs::uhdkit_pcs_getControlParamValue(const x265_param *param, int
> index)
>  {
>  int controlParamValue[NUM_CONTROLLER] = { param->bEnableFastIntra,
> param->bEnableEarlySkip, param->bEnableRectInter,
> diff -r bdf273f95217 -r 8defd4e7b2e4 source/pcs/pcs.h
> --- a/source/pcs/pcs.h  Tue Nov 08 14:20:24 2016 +0530
> +++ b/source/pcs/pcs.h  Mon Nov 14 18:38:05 2016 +0530
> @@ -32,6 +32,7 @@
>  /* variables handled by the PCS Instance */
>  pcs_param*  m_pcsParam;
>  pcs_contro

[x265] [PATCH] encoder.cpp: print reconfigure params for debug purpose

2016-11-17 Thread praveen
# HG changeset patch
# User Praveen Tiwari 
# Date 1479392046 -19800
#  Thu Nov 17 19:44:06 2016 +0530
# Node ID 64dc12e9aae9acacaaab5f7875d01fb09d4156d6
# Parent  4c1652f3884fba9fab4c589dd057b12e6bf33d5b
encoder.cpp: print reconfigure params for debug purpose

diff -r 4c1652f3884f -r 64dc12e9aae9 source/encoder/encoder.cpp
--- a/source/encoder/encoder.cppTue Nov 15 11:16:04 2016 +0530
+++ b/source/encoder/encoder.cppThu Nov 17 19:44:06 2016 +0530
@@ -2433,10 +2433,10 @@
 x265_param* oldParam = m_param;
 x265_param* newParam = m_latestParam;
 
-x265_log(newParam, X265_LOG_INFO, "Reconfigured param options, input 
Frame: %d\n", m_pocLast + 1);
+x265_log(newParam, X265_LOG_DEBUG, "Reconfigured param options, input 
Frame: %d\n", m_pocLast + 1);
 
 char tmp[40];
-#define TOOLCMP(COND1, COND2, STR)  if (COND1 != COND2) { sprintf(tmp, STR, 
COND1, COND2); x265_log(newParam, X265_LOG_INFO, tmp); }
+#define TOOLCMP(COND1, COND2, STR)  if (COND1 != COND2) { sprintf(tmp, STR, 
COND1, COND2); x265_log(newParam, X265_LOG_DEBUG, tmp); }
 TOOLCMP(oldParam->maxNumReferences, newParam->maxNumReferences, "ref=%d to 
%d\n");
 TOOLCMP(oldParam->bEnableFastIntra, newParam->bEnableFastIntra, 
"fast-intra=%d to %d\n");
 TOOLCMP(oldParam->bEnableEarlySkip, newParam->bEnableEarlySkip, 
"early-skip=%d to %d\n");
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] [PATCH 2 of 4] limitTU : use spatial and temporal CUs' TU depth to limit recursion

2016-11-17 Thread bhavna
# HG changeset patch
# User Bhavna Hariharan 
# Date 1479365378 -19800
#  Thu Nov 17 12:19:38 2016 +0530
# Node ID 07a4e4d785a69f719922129ca5997b12552bb4ab
# Parent  da1c770fa6e905fe341705b3f95a201a1a31fcf9
limitTU : use spatial and temporal CUs' TU depth to limit recursion

diff -r da1c770fa6e9 -r 07a4e4d785a6 source/common/cudata.cpp
--- a/source/common/cudata.cpp  Tue Nov 15 11:34:06 2016 +0530
+++ b/source/common/cudata.cpp  Thu Nov 17 12:19:38 2016 +0530
@@ -295,6 +295,9 @@
 
 /* initialize the remaining CU data in one memset */
 memset(m_cuDepth, 0, (frame.m_param->internalCsp == X265_CSP_I400 ? 
BytesPerPartition - 11 : BytesPerPartition - 7) * m_numPartitions);
+
+for (int8_t i = 0; i < NUM_TU_DEPTH; i++)
+m_refTuDepth[i] = -1;
 
 uint32_t widthInCU = m_slice->m_sps->numCuInWidth;
 m_cuLeft = (m_cuAddr % widthInCU) ? m_encData->getPicCTU(m_cuAddr - 1) : 
NULL;
diff -r da1c770fa6e9 -r 07a4e4d785a6 source/common/cudata.h
--- a/source/common/cudata.hTue Nov 15 11:34:06 2016 +0530
+++ b/source/common/cudata.hThu Nov 17 12:19:38 2016 +0530
@@ -28,6 +28,8 @@
 #include "slice.h"
 #include "mv.h"
 
+#define NUM_TU_DEPTH 21
+
 namespace X265_NS {
 // private namespace
 
@@ -204,6 +206,7 @@
 enum { BytesPerPartition = 21 };  // combined sizeof() of all per-part data
 
 coeff_t*  m_trCoeff[3];   // transformed coefficient buffer per 
plane
+int8_tm_refTuDepth[NUM_TU_DEPTH];   // TU depth of CU at depths 0, 
1 and 2
 
 MV*   m_mv[2];// array of motion vectors per list
 MV*   m_mvd[2];   // array of coded motion vector deltas 
per list
diff -r da1c770fa6e9 -r 07a4e4d785a6 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp   Tue Nov 15 11:34:06 2016 +0530
+++ b/source/encoder/analysis.cpp   Thu Nov 17 12:19:38 2016 +0530
@@ -203,6 +203,57 @@
 return *m_modeDepth[0].bestMode;
 }
 
+int32_t Analysis::loadTUDepth(CUGeom cuGeom, CUData parentCTU)
+{
+float predDepth = 0;
+CUData* neighbourCU;
+uint8_t count = 0;
+int32_t maxTUDepth = -1;
+neighbourCU = m_slice->m_refFrameList[0][0]->m_encData->m_picCTU;
+predDepth += neighbourCU->m_refTuDepth[cuGeom.geomRecurId];
+count++;
+if (m_slice->isInterB())
+{
+neighbourCU = m_slice->m_refFrameList[1][0]->m_encData->m_picCTU;
+predDepth += neighbourCU->m_refTuDepth[cuGeom.geomRecurId];
+count++;
+}
+if (parentCTU.m_cuAbove)
+{
+predDepth += parentCTU.m_cuAbove->m_refTuDepth[cuGeom.geomRecurId];
+count++;
+if (parentCTU.m_cuAboveLeft)
+{
+predDepth += 
parentCTU.m_cuAboveLeft->m_refTuDepth[cuGeom.geomRecurId];
+count++;
+}
+if (parentCTU.m_cuAboveRight)
+{
+predDepth += 
parentCTU.m_cuAboveRight->m_refTuDepth[cuGeom.geomRecurId];
+count++;
+}
+}
+if (parentCTU.m_cuLeft)
+{
+predDepth += parentCTU.m_cuLeft->m_refTuDepth[cuGeom.geomRecurId];
+count++;
+}
+predDepth /= count;
+
+if (predDepth == 0)
+maxTUDepth = 0;
+else if (predDepth < 1)
+maxTUDepth = 1;
+else if (predDepth >= 1 && predDepth <= 1.5)
+maxTUDepth = 2;
+else if (predDepth > 1.5 && predDepth <= 2.5)
+maxTUDepth = 3;
+else
+maxTUDepth = -1;
+
+return maxTUDepth;
+}
+
 void Analysis::tryLossless(const CUGeom& cuGeom)
 {
 ModeDepth& md = m_modeDepth[cuGeom.depth];
@@ -326,6 +377,15 @@
 checkBestMode(md.pred[PRED_INTRA_NxN], depth);
 }
 
+if (limitTU == X265_TU_LIMIT_NEIGH && cuGeom.log2CUSize >= 4)
+{
+CUData* ctu = 
md.bestMode->cu.m_encData->getPicCTU(parentCTU.m_cuAddr);
+int8_t maxTUDepth = -1;
+for (uint32_t i = 0; i < cuGeom.numPartitions; i++)
+maxTUDepth = X265_MAX(maxTUDepth, 
md.bestMode->cu.m_tuDepth[i]);
+ctu->m_refTuDepth[cuGeom.geomRecurId] = maxTUDepth;
+}
+
 if (m_bTryLossless)
 tryLossless(cuGeom);
 
@@ -894,6 +954,9 @@
 bool skipRectAmp = false;
 bool chooseMerge = false;
 
+if (limitTU == X265_TU_LIMIT_NEIGH && cuGeom.log2CUSize >= 4)
+m_maxTUDepth = loadTUDepth(cuGeom, parentCTU);
+
 SplitData splitData[4];
 splitData[0].initSplitCUData();
 splitData[1].initSplitCUData();
@@ -1400,6 +1463,17 @@
 if (m_param->rdLevel)
 md.bestMode->reconYuv.copyToPicYuv(reconPic, cuAddr, 
cuGeom.absPartIdx);
 
+if (limitTU == X265_TU_LIMIT_NEIGH && cuGeom.log2CUSize >= 4)
+{
+if (mightNotSplit)
+{
+CUData* ctu = 
md.bestMode->cu.m_encData->getPicCTU(parentCTU.m_cuAddr);
+int8_t maxTUDepth = -1;
+for (uint32_t i = 0; i < cuGeom.numPartitions; i++)
+maxTUDepth = X265_MAX(maxTUDepth, 
md.bestMode->cu.m_tuDepth[i]);
+ctu->m_refTuDepth[cuGeom.geo

[x265] [PATCH 3 of 4] limitTU : use neighbouring CUs' TU depth to limit 1st subTU's depth

2016-11-17 Thread bhavna
# HG changeset patch
# User Bhavna Hariharan 
# Date 1479380469 -19800
#  Thu Nov 17 16:31:09 2016 +0530
# Node ID f5d3a5aedcdf20235ec76dbf9aba516da83e8dd1
# Parent  07a4e4d785a69f719922129ca5997b12552bb4ab
limitTU : use neighbouring CUs' TU depth to limit 1st subTU's depth

diff -r 07a4e4d785a6 -r f5d3a5aedcdf source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp   Thu Nov 17 12:19:38 2016 +0530
+++ b/source/encoder/analysis.cpp   Thu Nov 17 16:31:09 2016 +0530
@@ -377,7 +377,7 @@
 checkBestMode(md.pred[PRED_INTRA_NxN], depth);
 }
 
-if (limitTU == X265_TU_LIMIT_NEIGH && cuGeom.log2CUSize >= 4)
+if ((limitTU & X265_TU_LIMIT_NEIGH) && cuGeom.log2CUSize >= 4)
 {
 CUData* ctu = 
md.bestMode->cu.m_encData->getPicCTU(parentCTU.m_cuAddr);
 int8_t maxTUDepth = -1;
@@ -954,7 +954,7 @@
 bool skipRectAmp = false;
 bool chooseMerge = false;
 
-if (limitTU == X265_TU_LIMIT_NEIGH && cuGeom.log2CUSize >= 4)
+if ((limitTU & X265_TU_LIMIT_NEIGH) && cuGeom.log2CUSize >= 4)
 m_maxTUDepth = loadTUDepth(cuGeom, parentCTU);
 
 SplitData splitData[4];
@@ -1463,7 +1463,7 @@
 if (m_param->rdLevel)
 md.bestMode->reconYuv.copyToPicYuv(reconPic, cuAddr, 
cuGeom.absPartIdx);
 
-if (limitTU == X265_TU_LIMIT_NEIGH && cuGeom.log2CUSize >= 4)
+if ((limitTU & X265_TU_LIMIT_NEIGH) && cuGeom.log2CUSize >= 4)
 {
 if (mightNotSplit)
 {
@@ -1498,7 +1498,7 @@
 md.pred[PRED_2Nx2N].rdCost = 0;
 }
 
-if (limitTU == X265_TU_LIMIT_NEIGH && cuGeom.log2CUSize >= 4)
+if ((limitTU & X265_TU_LIMIT_NEIGH) && cuGeom.log2CUSize >= 4)
 m_maxTUDepth =  loadTUDepth(cuGeom, parentCTU);
 
 SplitData splitData[4];
@@ -1827,7 +1827,7 @@
 if (mightSplit)
 addSplitFlagCost(*md.bestMode, cuGeom.depth);
 }
-if (limitTU == X265_TU_LIMIT_NEIGH && cuGeom.log2CUSize >= 4)
+if ((limitTU & X265_TU_LIMIT_NEIGH) && cuGeom.log2CUSize >= 4)
 {
 if (mightNotSplit)
 {
diff -r 07a4e4d785a6 -r f5d3a5aedcdf source/encoder/search.cpp
--- a/source/encoder/search.cpp Thu Nov 17 12:19:38 2016 +0530
+++ b/source/encoder/search.cpp Thu Nov 17 16:31:09 2016 +0530
@@ -2625,7 +2625,7 @@
 
 uint32_t tuDepthRange[2];
 cu.getInterTUQtDepthRange(tuDepthRange, 0);
-if (limitTU == X265_TU_LIMIT_NEIGH)
+if (limitTU & X265_TU_LIMIT_NEIGH)
 {
 int maxLog2CUSize = (int)g_log2Size[m_param->maxCUSize];
 m_maxTUDepth = x265_clip3(maxLog2CUSize - (int32_t)tuDepthRange[1], 
maxLog2CUSize - (int32_t)tuDepthRange[0], m_maxTUDepth);
@@ -2639,7 +2639,12 @@
 memset(&m_cacheTU, 0, sizeof(TUInfoCache));
 
 Cost costs;
-estimateResidualQT(interMode, cuGeom, 0, 0, *resiYuv, costs, tuDepthRange);
+if (limitTU == X265_TU_LIMIT_DFS_NEIGH)
+{
+int32_t tempDepth = m_maxTUDepth;
+m_maxTUDepth = tempDepth;
+}
+estimateResidualQT(interMode, cuGeom, 0, 0, *resiYuv, costs, 
tuDepthRange);
 
 uint32_t tqBypass = cu.m_tqBypass[0];
 if (!tqBypass)
@@ -2898,10 +2903,11 @@
 uint32_t ycbf = 0, ucbf = 0, vcbf = 0;
 for (uint32_t qIdx = 0, qPartIdx = absPartIdx; qIdx < 4; ++qIdx, qPartIdx 
+= qNumParts)
 {
-if (limitTU == X265_TU_LIMIT_DFS && tuDepth == 0 && qIdx == 1)
+if (limitTU & X265_TU_LIMIT_DFS && tuDepth == 0 && qIdx == 1)
 {
+m_maxTUDepth = cu.m_tuDepth[0];
 // Fetch maximum TU depth of first sub partition to limit 
recursion of others
-for (uint32_t i = 0; i < cuGeom.numPartitions / 4; i++)
+for (uint32_t i = 1; i < cuGeom.numPartitions / 4; i++)
 m_maxTUDepth = X265_MAX(m_maxTUDepth, cu.m_tuDepth[i]);
 }
 estimateResidualQT(mode, cuGeom, qPartIdx, tuDepth + 1, resiYuv, 
splitCost, depthRange, splitMore);
@@ -2968,7 +2974,7 @@
 }
 }
 }
-else if (limitTU == X265_TU_LIMIT_DFS || limitTU == X265_TU_LIMIT_NEIGH)
+else if (limitTU & X265_TU_LIMIT_DFS_NEIGH)
 {
 if (bCheckSplit && m_maxTUDepth >= 0)
 {
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] [PATCH 4 of 4] tests: update command lines to cover limitTU 3 and 4

2016-11-17 Thread bhavna
# HG changeset patch
# User Bhavna Hariharan 
# Date 1479302428 -19800
#  Wed Nov 16 18:50:28 2016 +0530
# Node ID dfb89e666e1aa13ee13b896c6f1967d5084907dd
# Parent  f5d3a5aedcdf20235ec76dbf9aba516da83e8dd1
tests:  update command lines to cover limitTU 3 and 4

diff -r f5d3a5aedcdf -r dfb89e666e1a source/test/regression-tests.txt
--- a/source/test/regression-tests.txt  Thu Nov 17 16:31:09 2016 +0530
+++ b/source/test/regression-tests.txt  Wed Nov 16 18:50:28 2016 +0530
@@ -19,10 +19,10 @@
 BasketballDrive_1920x1080_50.y4m,--preset medium --keyint -1 --nr-inter 100 
-F4 --no-sao
 BasketballDrive_1920x1080_50.y4m,--preset medium --no-cutree 
--analysis-mode=save --bitrate 7000 --limit-modes,--preset medium --no-cutree 
--analysis-mode=load --bitrate 7000 --limit-modes
 BasketballDrive_1920x1080_50.y4m,--preset slow --nr-intra 100 -F4 
--aq-strength 3 --qg-size 16 --limit-refs 1
-BasketballDrive_1920x1080_50.y4m,--preset slower --lossless --chromaloc 3 
--subme 0
+BasketballDrive_1920x1080_50.y4m,--preset slower --lossless --chromaloc 3 
--subme 0 --limit-tu 4
 BasketballDrive_1920x1080_50.y4m,--preset slower --no-cutree 
--analysis-mode=save --bitrate 7000,--preset slower --no-cutree 
--analysis-mode=load --bitrate 7000
-BasketballDrive_1920x1080_50.y4m,--preset veryslow --crf 4 --cu-lossless 
--pmode --limit-refs 1 --aq-mode 3 --limit-tu 1
-BasketballDrive_1920x1080_50.y4m,--preset veryslow --no-cutree 
--analysis-mode=save --bitrate 7000 --tskip-fast,--preset veryslow --no-cutree 
--analysis-mode=load --bitrate 7000  --tskip-fast
+BasketballDrive_1920x1080_50.y4m,--preset veryslow --crf 4 --cu-lossless 
--pmode --limit-refs 1 --aq-mode 3 --limit-tu 3
+BasketballDrive_1920x1080_50.y4m,--preset veryslow --no-cutree 
--analysis-mode=save --bitrate 7000 --tskip-fast --limit-tu 4,--preset veryslow 
--no-cutree --analysis-mode=load --bitrate 7000  --tskip-fast --limit-tu 4
 BasketballDrive_1920x1080_50.y4m,--preset veryslow --recon-y4m-exec "ffplay -i 
pipe:0 -autoexit"
 Coastguard-4k.y4m,--preset ultrafast --recon-y4m-exec "ffplay -i pipe:0 
-autoexit"
 Coastguard-4k.y4m,--preset superfast --tune grain --overscan=crop
@@ -47,7 +47,7 @@
 DucksAndLegs_1920x1080_60_10bit_422.yuv,--preset slow --temporal-layers 
--no-psy-rd --qg-size 32 --limit-refs 0 --cu-lossless
 DucksAndLegs_1920x1080_60_10bit_444.yuv,--preset veryfast --weightp --nr-intra 
1000 -F4
 DucksAndLegs_1920x1080_60_10bit_444.yuv,--preset medium --nr-inter 500 -F4 
--no-psy-rdoq
-DucksAndLegs_1920x1080_60_10bit_444.yuv,--preset slower --no-weightp 
--rdoq-level 0 --limit-refs 3
+DucksAndLegs_1920x1080_60_10bit_444.yuv,--preset slower --no-weightp 
--rdoq-level 0 --limit-refs 3 --tu-inter-depth 4 --limit-tu 3
 DucksAndLegs_1920x1080_60_10bit_422.yuv,--preset fast --no-cutree 
--analysis-mode=save --bitrate 3000 --early-skip --tu-inter-depth 3 --limit-tu 
1,--preset fast --no-cutree --analysis-mode=load --bitrate 3000 --early-skip 
--tu-inter-depth 3 --limit-tu 1
 FourPeople_1280x720_60.y4m,--preset superfast --no-wpp --lookahead-slices 2
 FourPeople_1280x720_60.y4m,--preset veryfast --aq-mode 2 --aq-strength 1.5 
--qg-size 8
@@ -80,7 +80,7 @@
 RaceHorses_416x240_30.y4m,--preset superfast --no-cutree
 RaceHorses_416x240_30.y4m,--preset medium --tskip-fast --tskip
 RaceHorses_416x240_30.y4m,--preset slower --keyint -1 --rdoq-level 0 
--limit-tu 2
-RaceHorses_416x240_30.y4m,--preset veryslow --tskip-fast --tskip --limit-refs 
3 --limit-tu 2
+RaceHorses_416x240_30.y4m,--preset veryslow --tskip-fast --tskip --limit-refs 
3 --limit-tu 3
 RaceHorses_416x240_30_10bit.yuv,--preset ultrafast --tune psnr --limit-refs 1
 RaceHorses_416x240_30_10bit.yuv,--preset veryfast --weightb
 RaceHorses_416x240_30_10bit.yuv,--preset faster --rdoq-level 0 --dither
@@ -110,7 +110,7 @@
 ducks_take_off_420_720p50.y4m,--preset veryslow --constrained-intra --bframes 2
 mobile_calendar_422_ntsc.y4m,--preset superfast --weightp
 mobile_calendar_422_ntsc.y4m,--preset medium --bitrate 500 -F4
-mobile_calendar_422_ntsc.y4m,--preset slower --tskip --tskip-fast --limit-tu 2
+mobile_calendar_422_ntsc.y4m,--preset slower --tskip --tskip-fast --limit-tu 4
 mobile_calendar_422_ntsc.y4m,--preset veryslow --tskip --limit-refs 2
 old_town_cross_444_720p50.y4m,--preset ultrafast --weightp --min-cu 32
 old_town_cross_444_720p50.y4m,--preset superfast --weightp --min-cu 16 
--limit-modes
@@ -120,7 +120,7 @@
 old_town_cross_444_720p50.y4m,--preset medium --keyint -1 --no-weightp --ref 6
 old_town_cross_444_720p50.y4m,--preset slow --rdoq-level 1 --early-skip --ref 
7 --no-b-pyramid
 old_town_cross_444_720p50.y4m,--preset slower --crf 4 --cu-lossless
-old_town_cross_444_720p50.y4m,--preset veryslow --max-tu-size 4 --min-cu-size 
32 --limit-tu 1
+old_town_cross_444_720p50.y4m,--preset veryslow --max-tu-size 4 --min-cu-size 
32 --limit-tu 4
 parkrun_ter_720p50.y4m,--preset medium --no-open-gop --sao-non-deblock --crf 4 
--cu-lossless
 parkrun_ter_720p50.y4m,--preset slower --fast-intra --no-rect --

[x265] [PATCH 1 of 4] cli: add option to support limitTU 3 and 4

2016-11-17 Thread bhavna
# HG changeset patch
# User Bhavna Hariharan 
# Date 1479189846 -19800
#  Tue Nov 15 11:34:06 2016 +0530
# Node ID da1c770fa6e905fe341705b3f95a201a1a31fcf9
# Parent  4c1652f3884fba9fab4c589dd057b12e6bf33d5b
cli: add option to support limitTU 3 and 4

diff -r 4c1652f3884f -r da1c770fa6e9 doc/reST/cli.rst
--- a/doc/reST/cli.rst  Tue Nov 15 11:16:04 2016 +0530
+++ b/doc/reST/cli.rst  Tue Nov 15 11:34:06 2016 +0530
@@ -869,13 +869,18 @@
partitions, in which case a TU split is implied and thus the
residual quad-tree begins one layer below the CU quad-tree.
 
-.. option:: --limit-tu <0|1|2>
+.. option:: --limit-tu <0..4>
 
Enables early exit from TU depth recursion, for inter coded blocks.
Level 1 - decides to recurse to next higher depth based on cost 
comparison of full size TU and split TU.
Level 2 - based on first split subTU's depth, limits recursion of
other split subTUs.
+   Level 3 - based on the average depth of the co-located and the neighbor
+   CUs' TU depth, limits recursion of the current CU.
+   Level 4 - uses the depth of the neighbouring/ co-located CUs TU depth 
+   to limit the 1st subTU depth. The 1st subTU depth is taken as the 
+   limiting depth for the other subTUs.
 
Default: 0
 
diff -r 4c1652f3884f -r da1c770fa6e9 source/common/param.cpp
--- a/source/common/param.cpp   Tue Nov 15 11:16:04 2016 +0530
+++ b/source/common/param.cpp   Tue Nov 15 11:34:06 2016 +0530
@@ -1126,7 +1126,7 @@
   "QuadtreeTUMaxDepthInter must be less than or equal to the 
difference between log2(maxCUSize) and QuadtreeTULog2MinSize plus 1");
 CHECK((param->maxTUSize != 32 && param->maxTUSize != 16 && 
param->maxTUSize != 8 && param->maxTUSize != 4),
   "max TU size must be 4, 8, 16, or 32");
-CHECK(param->limitTU > 2, "Invalid limit-tu option, limit-TU must be 0, 1 
or 2");
+CHECK(param->limitTU > 4, "Invalid limit-tu option, limit-TU must be 
between 0 and 4");
 CHECK(param->maxNumMergeCand < 1, "MaxNumMergeCand must be 1 or greater.");
 CHECK(param->maxNumMergeCand > 5, "MaxNumMergeCand must be 5 or smaller.");
 
diff -r 4c1652f3884f -r da1c770fa6e9 source/encoder/search.cpp
--- a/source/encoder/search.cpp Tue Nov 15 11:16:04 2016 +0530
+++ b/source/encoder/search.cpp Tue Nov 15 11:34:06 2016 +0530
@@ -94,6 +94,13 @@
 uint32_t sizeC = sizeL >> (m_hChromaShift + m_vChromaShift);
 uint32_t numPartitions = 1 << (maxLog2CUSize - LOG2_UNIT_SIZE) * 2;
 
+if (m_param->limitTU <= 2)
+limitTU = m_param->limitTU;
+else if (m_param->limitTU == 3)
+limitTU = X265_TU_LIMIT_NEIGH;
+else
+limitTU = X265_TU_LIMIT_DFS_NEIGH;
+
 /* these are indexed by qtLayer (log2size - 2) so nominally 0=4x4, 1=8x8, 
2=16x16, 3=32x32
  * the coeffRQT and reconQtYuv are allocated to the max CU size at every 
depth. The parts
  * which are reconstructed at each depth are valid. At the end, the 
transform depth table
diff -r 4c1652f3884f -r da1c770fa6e9 source/encoder/search.h
--- a/source/encoder/search.h   Tue Nov 15 11:16:04 2016 +0530
+++ b/source/encoder/search.h   Tue Nov 15 11:34:06 2016 +0530
@@ -277,6 +277,7 @@
 uint32_tm_numLayers;
 uint32_tm_refLagPixels;
 uint32_tm_maxTUDepth;
+uint16_tlimitTU;
 
 int16_t m_sliceMaxY;
 int16_t m_sliceMinY;
diff -r 4c1652f3884f -r da1c770fa6e9 source/x265.h
--- a/source/x265.h Tue Nov 15 11:16:04 2016 +0530
+++ b/source/x265.h Tue Nov 15 11:34:06 2016 +0530
@@ -357,6 +357,8 @@
 
 #define X265_TU_LIMIT_BFS   1
 #define X265_TU_LIMIT_DFS   2
+#define X265_TU_LIMIT_NEIGH 4
+#define X265_TU_LIMIT_DFS_NEIGH 6
 
 #define X265_BFRAME_MAX 16
 #define X265_MAX_FRAME_THREADS  16
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel