Re: [x265] [PATCH] Clean up dynamic refinement

2018-05-25 Thread Bhavna Hariharan
Please ignore this patch, I will resend it with some changes.


Thanks,

Bhavna Hariharan

On Tue, May 22, 2018 at 2:14 PM, <bha...@multicorewareinc.com> wrote:

> # HG changeset patch
> # User Bhavna Hariharan <bha...@multicorewareinc.com>
> # Date 1526964471 -19800
> #  Tue May 22 10:17:51 2018 +0530
> # Node ID 5587d9a25248075edadf94e1a78f6e11d091f651
> # Parent  cc2c5e46f3c87d27e3602af30b06ba6a0fbe2704
> Clean up dynamic refinement
>
> This patch does the following:
> 1) Earlier, locks were used to avoid the possibility of race conditions
> while
> copying data from CTU level to frame level. Now, the data is collected for
> each
> row and when the entire frame completes analysis the row data is copied to
> the
> frame. This method eliminates the possibility of a race condition without
> having to employ locks.
> 2) Allocate memory for the CTU infromation from the data pool, this will
> avoid
> fragmentation of data.
>
> diff -r cc2c5e46f3c8 -r 5587d9a25248 source/common/common.h
> --- a/source/common/common.hMon May 21 18:42:29 2018 +0530
> +++ b/source/common/common.hTue May 22 10:17:51 2018 +0530
> @@ -332,6 +332,8 @@
>  #define START_CODE_OVERHEAD 3
>  #define FILLER_OVERHEAD (NAL_TYPE_OVERHEAD + START_CODE_OVERHEAD + 1)
>
> +#define MAX_NUM_DYN_REFINE  ((NUM_CU_DEPTH - 1) *
> X265_REFINE_INTER_LEVELS)
> +
>  namespace X265_NS {
>
>  enum { SAO_NUM_OFFSET = 4 };
> diff -r cc2c5e46f3c8 -r 5587d9a25248 source/common/cudata.cpp
> --- a/source/common/cudata.cpp  Mon May 21 18:42:29 2018 +0530
> +++ b/source/common/cudata.cpp  Tue May 22 10:17:51 2018 +0530
> @@ -274,6 +274,9 @@
>  for (int i = 0; i < 3; i++)
>  m_fAc_den[i] = m_fDc_den[i] = 0;
>  }
> +m_collectCURd = dataPool.dynRefineRdBlock + (instance *
> MAX_NUM_DYN_REFINE);
> +m_collectCUVariance = dataPool.dynRefVarBlock + (instance *
> MAX_NUM_DYN_REFINE);
> +m_collectCUCount = dataPool.dynRefCntBlock + (instance *
> MAX_NUM_DYN_REFINE);
>  }
>
>  void CUData::initCTU(const Frame& frame, uint32_t cuAddr, int qp,
> uint32_t firstRowInSlice, uint32_t lastRowInSlice, uint32_t lastCuInSlice)
> @@ -318,15 +321,9 @@
>  m_cuAboveRight = (m_cuAbove && ((m_cuAddr % widthInCU) < (widthInCU -
> 1))) ? m_encData->getPicCTU(m_cuAddr - widthInCU + 1) : NULL;
>  memset(m_distortion, 0, m_numPartitions * sizeof(sse_t));
>
> -if (m_encData->m_param->bDynamicRefine)
> -{
> -int size = m_encData->m_param->maxCUDepth *
> X265_REFINE_INTER_LEVELS;
> -CHECKED_MALLOC_ZERO(m_collectCURd, uint64_t, size);
> -CHECKED_MALLOC_ZERO(m_collectCUVariance, uint32_t, size);
> -CHECKED_MALLOC_ZERO(m_collectCUCount, uint32_t, size);
> -}
> -fail:
> -return;
> +memset(m_collectCURd, 0, MAX_NUM_DYN_REFINE * sizeof(uint64_t));
> +memset(m_collectCUVariance, 0, MAX_NUM_DYN_REFINE * sizeof(uint32_t));
> +memset(m_collectCUCount, 0, MAX_NUM_DYN_REFINE * sizeof(uint32_t));
>  }
>
>  // initialize Sub partition
> diff -r cc2c5e46f3c8 -r 5587d9a25248 source/common/cudata.h
> --- a/source/common/cudata.hMon May 21 18:42:29 2018 +0530
> +++ b/source/common/cudata.hTue May 22 10:17:51 2018 +0530
> @@ -353,8 +353,12 @@
>  coeff_t* trCoeffMemBlock;
>  MV*  mvMemBlock;
>  sse_t*   distortionMemBlock;
> +uint64_t* dynRefineRdBlock;
> +uint32_t* dynRefCntBlock;
> +uint32_t* dynRefVarBlock;
>
> -CUDataMemPool() { charMemBlock = NULL; trCoeffMemBlock = NULL;
> mvMemBlock = NULL; distortionMemBlock = NULL; }
> +CUDataMemPool() { charMemBlock = NULL; trCoeffMemBlock = NULL;
> mvMemBlock = NULL; distortionMemBlock = NULL;
> +  dynRefineRdBlock = NULL; dynRefCntBlock = NULL;
> dynRefVarBlock = NULL;}
>
>  bool create(uint32_t depth, uint32_t csp, uint32_t numInstances,
> const x265_param& param)
>  {
> @@ -373,6 +377,9 @@
>  CHECKED_MALLOC(charMemBlock, uint8_t, numPartition * numInstances
> * CUData::BytesPerPartition);
>  CHECKED_MALLOC_ZERO(mvMemBlock, MV, numPartition * 4 *
> numInstances);
>  CHECKED_MALLOC(distortionMemBlock, sse_t, numPartition *
> numInstances);
> +CHECKED_MALLOC_ZERO(dynRefineRdBlock, uint64_t,
> MAX_NUM_DYN_REFINE * numInstances);
> +CHECKED_MALLOC_ZERO(dynRefCntBlock, uint32_t, MAX_NUM_DYN_REFINE
> * numInstances);
> +CHECKED_MALLOC_ZERO(dynRefVarBlock, uint32_t, MAX_NUM_DYN_REFINE
> * numInstances);
>  return true;
>  fail:
>  return false;
> @@ -384,6 +391,9 @@
>  X265_FREE(mvMemBlock);
>  X265_FREE(charMemBlock);
> 

Re: [x265] [PATCH] scaling: allow refinement of inter/intra frames when scalefactor is zero

2018-04-26 Thread Bhavna Hariharan
On Thu, Apr 26, 2018 at 12:41 PM,  wrote:

> # HG changeset patch
> # User Kavitha Sampath 
> # Date 1524568273 -19800
> #  Tue Apr 24 16:41:13 2018 +0530
> # Node ID 860c3b32f59e1883b77064b9948e8e17decd4641
> # Parent  07defe235cde1949c55464952ee1dbdc10baf5e4
> scaling: allow refinement of inter/intra frames when scalefactor is zero
>
> Also disable analysis of CU with min-cu size when scale factor is zero
>
> diff -r 07defe235cde -r 860c3b32f59e source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp   Thu Apr 12 16:57:19 2018 +0530
> +++ b/source/encoder/analysis.cpp   Tue Apr 24 16:41:13 2018 +0530
> @@ -523,7 +523,7 @@
>  int split = 0;
>  if (m_param->intraRefine && m_param->intraRefine != 4)
>  {
> -split = ((cuGeom.log2CUSize == 
> (uint32_t)(g_log2Size[m_param->minCUSize]
> + 1)) && bDecidedDepth);
> +split = m_param->scaleFactor && ((cuGeom.log2CUSize ==
> (uint32_t)(g_log2Size[m_param->minCUSize] + 1)) && bDecidedDepth);
>  if (cuGeom.log2CUSize == (uint32_t)(g_log2Size[m_param->minCUSize])
> && !bDecidedDepth)
>  bAlreadyDecided = false;
>  }
> @@ -2420,7 +2420,7 @@
>  m_refineLevel = m_param->interRefine;
>  else
>  m_refineLevel = m_frame->m_classifyFrame ? 1 : 3;
> -int split = (m_refineLevel && cuGeom.log2CUSize ==
> (uint32_t)(g_log2Size[m_param->minCUSize] + 1) && bDecidedDepth);
> +int split = (m_param->scaleFactor && m_refineLevel &&
> cuGeom.log2CUSize == (uint32_t)(g_log2Size[m_param->minCUSize] + 1) &&
> bDecidedDepth);
>  td.split = split;
>
>  if (bDecidedDepth)
> @@ -2494,7 +2494,7 @@
>  mode.cu.m_mvd[list][pu.puAbsPartIdx] =
> mode.cu.m_mv[list][pu.puAbsPartIdx] - mode.amvpCand[list][ref][mode.
> cu.m_mvpIdx[list][pu.puAbsPartIdx]]/*mvp*/;
>  }
>  }
> -else if(m_param->scaleFactor)
> +else
>  {
>  MVField candMvField[MRG_MAX_NUM_CANDS][2]; //
> double length for mv of both lists
>  uint8_t candDir[MRG_MAX_NUM_CANDS];
> diff -r 07defe235cde -r 860c3b32f59e source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cppThu Apr 12 16:57:19 2018 +0530
> +++ b/source/encoder/encoder.cppTue Apr 24 16:41:13 2018 +0530
> @@ -2694,7 +2694,7 @@
>
>  if (p->intraRefine)
>  {
> -if (!p->analysisLoad || p->analysisReuseLevel < 10 ||
> !p->scaleFactor)
> +if (!p->analysisLoad || p->analysisReuseLevel < 10)
>  {
>  x265_log(p, X265_LOG_WARNING, "Intra refinement requires
> analysis load, analysis-reuse-level 10, scale factor. Disabling intra
> refine.\n");
>

All the warning messages still say that scale factor is required to enable
refinement methods, we can remove that.

I see that you've enabled inter-refine, intra-refine and refine-mv without
scale-factor, is there any reason for not enabling dynamic refinement
without scale factor?



>  p->intraRefine = 0;
> @@ -2703,7 +2703,7 @@
>
>  if (p->interRefine)
>  {
> -if (!p->analysisLoad || p->analysisReuseLevel < 10 ||
> !p->scaleFactor)
> +if (!p->analysisLoad || p->analysisReuseLevel < 10)
>  {
>  x265_log(p, X265_LOG_WARNING, "Inter refinement requires
> analysis load, analysis-reuse-level 10, scale factor. Disabling inter
> refine.\n");
>  p->interRefine = 0;
> @@ -2737,7 +2737,7 @@
>
>  if (p->mvRefine)
>  {
> -if (!p->analysisLoad || p->analysisReuseLevel < 10 ||
> !p->scaleFactor)
> +if (!p->analysisLoad || p->analysisReuseLevel < 10)
>  {
>  x265_log(p, X265_LOG_WARNING, "MV refinement requires
> analysis load, analysis-reuse-level 10, scale factor. Disabling MV
> refine.\n");
>  p->mvRefine = 0;
>
> ___
> x265-devel mailing list
> x265-devel@videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
>
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH x265] dynamic-refine: Remove lock while collecting CTU data

2018-04-12 Thread Bhavna Hariharan
Please ignore this patch.


Thanks,

Bhavna Hariharan

On Thu, Apr 12, 2018 at 3:52 PM, <bha...@multicorewareinc.com> wrote:

> # HG changeset patch
> # User Bhavna Hariharan <bha...@multicorewareinc.com>
> # Date 1523528451 -19800
> #  Thu Apr 12 15:50:51 2018 +0530
> # Node ID bff8e6d4407bebd8ff19be8323deacc16be4875c
> # Parent  04a337abd70de269cef7d9655365f3a3ebde02aa
> dynamic-refine: Remove lock while collecting CTU data
>
> Locks were used to avoid the possibility of race conditions while copying
> data from CTU level to frame level. Now, the data is collected for each
> row and
> when the entire frame completes analysis the row data is copied to the
> frame.
> This method eliminates the possibility of a race condition without having
> to
> employ locks.
>
> diff -r 04a337abd70d -r bff8e6d4407b source/common/common.h
> --- a/source/common/common.hThu Apr 12 15:10:59 2018 +0530
> +++ b/source/common/common.hThu Apr 12 15:50:51 2018 +0530
> @@ -332,6 +332,8 @@
>  #define START_CODE_OVERHEAD 3
>  #define FILLER_OVERHEAD (NAL_TYPE_OVERHEAD + START_CODE_OVERHEAD + 1)
>
> +#define MAX_NUM_DYN_REFINE  ((NUM_CU_DEPTH - 1) *
> X265_REFINE_INTER_LEVELS)
> +
>  namespace X265_NS {
>
>  enum { SAO_NUM_OFFSET = 4 };
> diff -r 04a337abd70d -r bff8e6d4407b source/common/framedata.h
> --- a/source/common/framedata.h Thu Apr 12 15:10:59 2018 +0530
> +++ b/source/common/framedata.h Thu Apr 12 15:50:51 2018 +0530
> @@ -88,6 +88,11 @@
>  uint64_tcntInterPu[NUM_CU_DEPTH][INTER_MODES - 1];
>  uint64_tcntMergePu[NUM_CU_DEPTH][INTER_MODES - 1];
>
> +/* Feature values per row for dynamic refinement */
> +uint64_t   rowRdDyn[MAX_NUM_DYN_REFINE];
> +uint32_t   rowVarDyn[MAX_NUM_DYN_REFINE];
> +uint32_t   rowCntDyn[MAX_NUM_DYN_REFINE];
> +
>  FrameStats()
>  {
>  memset(this, 0, sizeof(FrameStats));
> diff -r 04a337abd70d -r bff8e6d4407b source/encoder/frameencoder.cpp
> --- a/source/encoder/frameencoder.cpp   Thu Apr 12 15:10:59 2018 +0530
> +++ b/source/encoder/frameencoder.cpp   Thu Apr 12 15:50:51 2018 +0530
> @@ -935,6 +935,9 @@
>  }
>  } // end of (m_param->maxSlices > 1)
>
> +if (m_param->bDynamicRefine && m_top->m_startPoint <=
> m_frame->m_encodeOrder) //Avoid collecting data that will not be used by
> future frames.
> +collectDynDataFrame();
> +
>  if (m_param->rc.bStatWrite)
>  {
>  int totalI = 0, totalP = 0, totalSkip = 0;
> @@ -1473,27 +1476,13 @@
>
>  // Does all the CU analysis, returns best top level mode decision
>  Mode& best = tld.analysis.compressCTU(*ctu, *m_frame,
> m_cuGeoms[m_ctuGeomMap[cuAddr]], rowCoder);
> -if (m_param->bDynamicRefine)
> +
> +/* startPoint > encodeOrder is true when the start point changes
> for
> +a new GOP but all frames of the previous GOP is still incomplete
> +The data from these frames will not be used by any future frames.
> */
> +if (m_param->bDynamicRefine && m_top->m_startPoint <=
> m_frame->m_encodeOrder)
>  {
> -if (m_top->m_startPoint <= m_frame->m_encodeOrder) // Avoid
> collecting data that will not be used by future frames.
> -{
> -ScopedLock dynLock(m_top->m_dynamicRefineLock);
> -for (uint32_t i = 0; i < X265_REFINE_INTER_LEVELS; i++)
> -{
> -for (uint32_t depth = 0; depth < m_param->maxCUDepth;
> depth++)
> -{
> -int offset = (depth * X265_REFINE_INTER_LEVELS) +
> i;
> -int curFrameIndex = m_frame->m_encodeOrder -
> m_top->m_startPoint;
> -int index = (curFrameIndex *
> X265_REFINE_INTER_LEVELS * m_param->maxCUDepth) + offset;
> -if (ctu->m_collectCUCount[offset])
> -{
> -m_top->m_variance[index] +=
> ctu->m_collectCUVariance[offset];
> -m_top->m_rdCost[index] +=
> ctu->m_collectCURd[offset];
> -m_top->m_trainingCount[index] +=
> ctu->m_collectCUCount[offset];
> -}
> -}
> -}
> -}
> +collectDynDataRow(*ctu, );
>  X265_FREE_ZERO(ctu->m_collectCUVariance);
>  X265_FREE_ZERO(ctu->m_collectCURd);
>  X265_FREE_ZERO(ctu->m_collectCUCount);
> @@ -1880,6 +1869,46 @@
>  if (ATOMIC_

Re: [x265] [PATCH x265] dynamic-refine: restart training at the beginning of each GOP

2018-04-02 Thread Bhavna Hariharan
On Mon, Apr 2, 2018 at 4:04 PM, Ashok Kumar Mishra <
as...@multicorewareinc.com> wrote:

>
>
> On Fri, Mar 30, 2018 at 5:26 PM, <bha...@multicorewareinc.com> wrote:
>
>> # HG changeset patch
>> # User Bhavna Hariharan <bha...@multicorewareinc.com>
>> # Date 1522408210 -19800
>> #  Fri Mar 30 16:40:10 2018 +0530
>> # Node ID ae72210ad6e846062572ef8a02970b74052c2f1c
>> # Parent  3440a56acc7865dcdea725b8ce7755450209a8ee
>> dynamic-refine: restart training at the beginning of each GOP
>>
>> Earlier, the training restarted at scenecuts. Now, the algorithm restarts
>> at
>> the beginning of each GOP in addition to the scenecuts.
>>
>> diff -r 3440a56acc78 -r ae72210ad6e8 source/encoder/encoder.cpp
>> --- a/source/encoder/encoder.cppThu Mar 29 14:35:50 2018 +0530
>> +++ b/source/encoder/encoder.cppFri Mar 30 16:40:10 2018 +0530
>> @@ -416,7 +416,8 @@
>>
>>  if (m_param->bDynamicRefine)
>>  {
>> -int size = m_param->totalFrames * m_param->maxCUDepth *
>> X265_REFINE_INTER_LEVELS;
>> +/* Allocate memory for 1 GOP and reuse it for the subsequent
>> GOPs */
>> +int size = (m_param->keyframeMax + m_param->lookaheadDepth) *
>> m_param->maxCUDepth * X265_REFINE_INTER_LEVELS;
>>  CHECKED_MALLOC_ZERO(m_variance, uint64_t, size);
>>  CHECKED_MALLOC_ZERO(m_rdCost, uint64_t, size);
>>  CHECKED_MALLOC_ZERO(m_trainingCount, uint32_t, size);
>> diff -r 3440a56acc78 -r ae72210ad6e8 source/encoder/frameencoder.cpp
>> --- a/source/encoder/frameencoder.cpp   Thu Mar 29 14:35:50 2018 +0530
>> +++ b/source/encoder/frameencoder.cpp   Fri Mar 30 16:40:10 2018 +0530
>> @@ -1463,6 +1463,7 @@
>>
>>  if (m_param->bDynamicRefine)
>>  {
>> +if (m_top->m_startPoint <= m_frame->m_encodeOrder) // Avoid
>> collecting data that will not be used by future frames.
>>  {
>>  ScopedLock dynLock(m_top->m_dynamicRefineLock);
>>  for (uint32_t i = 0; i < X265_REFINE_INTER_LEVELS; i++)
>> @@ -1470,7 +1471,8 @@
>>  for (uint32_t depth = 0; depth <
>> m_param->maxCUDepth; depth++)
>>  {
>>  int offset = (depth * X265_REFINE_INTER_LEVELS)
>> + i;
>> -int index = (m_frame->m_encodeOrder *
>> X265_REFINE_INTER_LEVELS * m_param->maxCUDepth) + offset;
>> +int curFrameIndex = m_frame->m_encodeOrder -
>> m_top->m_startPoint;
>> +int index = (curFrameIndex *
>> X265_REFINE_INTER_LEVELS * m_param->maxCUDepth) + offset;
>>  if (ctu->m_collectCUCount[offset])
>>  {
>>  m_top->m_variance[index] +=
>> ctu->m_collectCUVariance[offset];
>> @@ -1869,8 +1871,14 @@
>>
>>  void FrameEncoder::computeAvgTrainingData()
>>  {
>> -if (m_frame->m_lowres.bScenecut)
>> +if (m_frame->m_lowres.bScenecut || m_frame->m_lowres.bKeyframe)
>> +{
>>  m_top->m_startPoint = m_frame->m_encodeOrder;
>> +int size = (m_param->keyframeMax + m_param->lookaheadDepth) *
>> m_param->maxCUDepth * X265_REFINE_INTER_LEVELS;
>> +memset(m_top->m_variance, 0, size * sizeof(uint64_t));
>> +memset(m_top->m_rdCost, 0, size * sizeof(uint64_t));
>> +memset(m_top->m_trainingCount, 0, size * sizeof(uint32_t));
>>
>
> Is it required to reset the buffer every time? I believe you might be over
> writing those buffers periodically. Correct me if i am wrong.
>

We will be incrementally adding to the buffer so the reset is required when
we want to restart the training.

>
> +}
>>
>>  if (m_frame->m_encodeOrder - m_top->m_startPoint < 2 *
>> m_param->frameNumThreads)
>>  m_frame->m_classifyFrame = false;
>> @@ -1884,8 +1892,8 @@
>>
>>  if (m_frame->m_classifyFrame)
>>  {
>> -uint32_t limit = m_frame->m_encodeOrder -
>> m_param->frameNumThreads - 1;
>> -for (uint32_t i = m_top->m_startPoint + 1; i < limit; i++)
>> +uint32_t limit = m_frame->m_encodeOrder - m_top->m_startPoint -
>> m_param->frameNumThreads;
>> +for (uint32_t i = 1; i < limit; i++)
>>  {
>>  for (uint32_t j = 0; j < X265_REFINE_INTER_LEVELS; j++)
>>  {
>>
>> ___
>> x265-devel mailing list
>> x265-devel@videolan.org
>> https://mailman.videolan.org/listinfo/x265-devel
>>
>>
>
> ___
> x265-devel mailing list
> x265-devel@videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
>
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH x265 Stable] Deprecate support for refine inter 0 for scaled encodes

2018-03-23 Thread Bhavna Hariharan
This patch is for the stable branch. Could you please try applying it on
changeset 11993 (22c127ff05d5) Fix 32 bit build error ( using CMAKE GUI) in
Linux?


Thanks,

Bhavna Hariharan

On Fri, Mar 23, 2018 at 3:05 PM, Ashok Kumar Mishra <
as...@multicorewareinc.com> wrote:

>
>
> On Fri, Mar 23, 2018 at 2:14 PM, <bha...@multicorewareinc.com> wrote:
>
>> # HG changeset patch
>> # User Bhavna Hariharan <bha...@multicorewareinc.com>
>> # Date 1521693438 -19800
>> #  Thu Mar 22 10:07:18 2018 +0530
>> # Branch stable
>> # Node ID e1280b17edf7b3ff22a8a45fed935896802909b4
>> # Parent  22c127ff05d593cbe420f1cb4a57c39d5a885957
>> Deprecate support for refine inter 0 for scaled encodes.
>>
>> diff -r 22c127ff05d5 -r e1280b17edf7 source/common/cudata.cpp
>> --- a/source/common/cudata.cpp  Thu Mar 01 15:35:06 2018 +0530
>> +++ b/source/common/cudata.cpp  Thu Mar 22 10:07:18 2018 +0530
>> @@ -1626,11 +1626,6 @@
>>  dir |= (1 << list);
>>  candMvField[count][list].mv = colmv;
>>  candMvField[count][list].refIdx = refIdx;
>> -if (m_encData->m_param->scaleFactor &&
>> m_encData->m_param->analysisSave && m_log2CUSize[0] < 4)
>> -{
>> -MV dist(MAX_MV, MAX_MV);
>> -candMvField[count][list].mv = dist;
>> -}
>>  }
>>  }
>>
>> @@ -1790,14 +1785,7 @@
>>
>>  int curRefPOC = m_slice->m_refPOCList[picList][refIdx];
>>  int curPOC = m_slice->m_poc;
>> -
>> -if (m_encData->m_param->scaleFactor &&
>> m_encData->m_param->analysisSave && (m_log2CUSize[0] < 4))
>> -{
>> -MV dist(MAX_MV, MAX_MV);
>> -pmv[numMvc++] = amvpCand[num++] = dist;
>> -}
>> -else
>> -pmv[numMvc++] = amvpCand[num++] =
>> scaleMvByPOCDist(neighbours[MD_COLLOCATED].mv[picList], curPOC,
>> curRefPOC, colPOC, colRefPOC);
>> +pmv[numMvc++] = amvpCand[num++] =
>> scaleMvByPOCDist(neighbours[MD_COLLOCATED].mv[picList], curPOC,
>> curRefPOC, colPOC, colRefPOC);
>>  }
>>  }
>>
>> diff -r 22c127ff05d5 -r e1280b17edf7 source/encoder/encoder.cpp
>> --- a/source/encoder/encoder.cppThu Mar 01 15:35:06 2018 +0530
>> +++ b/source/encoder/encoder.cppThu Mar 22 10:07:18 2018 +0530
>> @@ -2685,6 +2685,12 @@
>>  }
>>  }
>>
>> +if (p->scaleFactor && p->analysisLoad && !p->interRefine)
>> +{
>> +x265_log(p, X265_LOG_WARNING, "Inter refinement 0 is not
>> supported with scaling. Enabling refine-inter 1.\n");
>> +p->interRefine = 1;
>> +}
>> +
>>  if (p->limitTU && p->interRefine)
>>  {
>>  x265_log(p, X265_LOG_WARNING, "Inter refinement does not support
>> limitTU. Disabling limitTU.\n");
>>
>> ___
>> x265-devel mailing list
>> x265-devel@videolan.org
>> https://mailman.videolan.org/listinfo/x265-devel
>>
>>
> Unable to apply on latest tip. Please send it again.
>
> ___
> x265-devel mailing list
> x265-devel@videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
>
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] limitTU: Save intra CU's TU depth when analysis save/load is enabled

2018-02-14 Thread Bhavna Hariharan
This is an output changing commit for --limit-tu 3 and --limit-tu 4.


Thanks,

Bhavna Hariharan

On Wed, Feb 14, 2018 at 3:46 PM, <bha...@multicorewareinc.com> wrote:

> # HG changeset patch
> # User Bhavna Hariharan <bha...@multicorewareinc.com>
> # Date 1518603329 -19800
> #  Wed Feb 14 15:45:29 2018 +0530
> # Node ID 36ab263cc891e0cb5d3dd5ce2c80e35faa0ce996
> # Parent  01b685d6fa33aaab2b7f53aa1e1a2aec0a954025
> limitTU: Save intra CU's TU depth when analysis save/load is enabled
>
> This patch will cause a output mismatch between analysis save and load when
> limit-tu 3/4 is enabled. This change is expected as the load run will have
> only
> the best mode's TU information. For CUs where the neighbour's TU depth is
> unavailable, load encode will evaluate all TU detphs.
>
> diff -r 01b685d6fa33 -r 36ab263cc891 doc/reST/cli.rst
> --- a/doc/reST/cli.rst  Sat Feb 10 06:16:45 2018 +0100
> +++ b/doc/reST/cli.rst  Wed Feb 14 15:45:29 2018 +0530
> @@ -1029,7 +1029,13 @@
> Level 4 - uses the depth of the neighbouring/ co-located CUs TU
> depth
> to limit the 1st subTU depth. The 1st subTU depth is taken as the
> limiting depth for the other subTUs.
> -
> +
> +   Enabling levels 3 or 4 may cause a mismatch in the output
> bitstreams
> +   between option:`--analysis-save` and option:`--analysis-load`
> +   as all neighbouring CUs TU depth may not be available in the
> +   option:`--analysis-load` run as only the best mode's information is
> +   available to it.
> +
> Default: 0
>
>  .. option:: --nr-intra , --nr-inter 
> diff -r 01b685d6fa33 -r 36ab263cc891 source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp   Sat Feb 10 06:16:45 2018 +0100
> +++ b/source/encoder/analysis.cpp   Wed Feb 14 15:45:29 2018 +0530
> @@ -647,13 +647,12 @@
>  cacheCost[cuIdx] = md.bestMode->rdCost;
>  }
>
> -/* Save Intra CUs TU depth only when analysis mode is OFF */
> -if ((m_limitTU & X265_TU_LIMIT_NEIGH) && cuGeom.log2CUSize >= 4 &&
> (!m_param->analysisSave && !m_param->analysisLoad))
> +if ((m_limitTU & X265_TU_LIMIT_NEIGH) && cuGeom.log2CUSize >= 4)
>  {
>  CUData* ctu = md.bestMode->cu.m_encData->
> getPicCTU(parentCTU.m_cuAddr);
>  int8_t maxTUDepth = -1;
>  for (uint32_t i = 0; i < cuGeom.numPartitions; i++)
> -maxTUDepth = X265_MAX(maxTUDepth, md.pred[PRED_INTRA].cu.m_
> tuDepth[i]);
> +maxTUDepth = X265_MAX(maxTUDepth,
> md.bestMode->cu.m_tuDepth[i]);
>  ctu->m_refTuDepth[cuGeom.geomRecurId] = maxTUDepth;
>  }
>
> diff -r 01b685d6fa33 -r 36ab263cc891 source/test/regression-tests.txt
> --- a/source/test/regression-tests.txt  Sat Feb 10 06:16:45 2018 +0100
> +++ b/source/test/regression-tests.txt  Wed Feb 14 15:45:29 2018 +0530
> @@ -23,7 +23,7 @@
>  BasketballDrive_1920x1080_50.y4m,--preset slower --lossless --chromaloc
> 3 --subme 0 --limit-tu 4
>  BasketballDrive_1920x1080_50.y4m,--preset slower --no-cutree
> --analysis-save x265_analysis.dat --analysis-reuse-level 10 --bitrate 7000
> --limit-tu 0::--preset slower --no-cutree --analysis-load x265_analysis.dat
> --analysis-reuse-level 10 --bitrate 7000 --limit-tu 0
>  BasketballDrive_1920x1080_50.y4m,--preset veryslow --crf 4 --cu-lossless
> --pmode --limit-refs 1 --aq-mode 3 --limit-tu 3
> -BasketballDrive_1920x1080_50.y4m,--preset veryslow --no-cutree
> --analysis-save x265_analysis.dat --bitrate 7000 --tskip-fast --limit-tu
> 4::--preset veryslow --no-cutree --analysis-load x265_analysis.dat
> --bitrate 7000  --tskip-fast --limit-tu 4
> +BasketballDrive_1920x1080_50.y4m,--preset veryslow --no-cutree
> --analysis-save x265_analysis.dat --bitrate 7000 --tskip-fast --limit-tu
> 2::--preset veryslow --no-cutree --analysis-load x265_analysis.dat
> --bitrate 7000  --tskip-fast --limit-tu 2
>  BasketballDrive_1920x1080_50.y4m,--preset veryslow --recon-y4m-exec
> "ffplay -i pipe:0 -autoexit"
>  Coastguard-4k.y4m,--preset ultrafast --recon-y4m-exec "ffplay -i pipe:0
> -autoexit"
>  Coastguard-4k.y4m,--preset superfast --tune grain --overscan=crop
>
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] fix multiple insertion of payloadSize into bitstream

2017-09-26 Thread Bhavna Hariharan
Please ignore this, I shall resend an updated patch.


Regards,

Bhavna Hariharan

On Mon, Sep 25, 2017 at 6:42 PM, <bha...@multicorewareinc.com> wrote:

> # HG changeset patch
> # User Bhavna Hariharan <bha...@multicorewareinc.com>
> # Date 1506344707 -19800
> #  Mon Sep 25 18:35:07 2017 +0530
> # Node ID 3296d54f8b38c3b9fe03f8a211c4ee3b624c0216
> # Parent  f8ae7afc1f61ed0db3b2f23f5d581706fe6ed677
> fix multiple insertion of payloadSize into bitstream
>
> bitbucket issue #369
>
> diff -r f8ae7afc1f61 -r 3296d54f8b38 source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cppMon Sep 11 11:12:19 2017 +0530
> +++ b/source/encoder/encoder.cppMon Sep 25 18:35:07 2017 +0530
> @@ -631,12 +631,12 @@
>  int32_t i = 0;
>  toneMap.payloadSize = 0;
>  while (m_cim[currentPOC][i] == 0xFF)
> -toneMap.payloadSize += m_cim[currentPOC][i++] + 1;
> -toneMap.payloadSize += m_cim[currentPOC][i] + 1;
> +toneMap.payloadSize += m_cim[currentPOC][i++];
> +toneMap.payloadSize += m_cim[currentPOC][i];
>
>  toneMap.payload = (uint8_t*)x265_malloc(sizeof(uint8_t)
> * toneMap.payloadSize);
>  toneMap.payloadType = USER_DATA_REGISTERED_ITU_T_T35;
> -memcpy(toneMap.payload, m_cim[currentPOC],
> toneMap.payloadSize);
> +memcpy(toneMap.payload, _cim[currentPOC][1],
> toneMap.payloadSize);
>  }
>  }
>  #endif
>
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] Fix testbench crash with dhdr10 enabled

2017-08-04 Thread Bhavna Hariharan
Please ignore this patch.


Regards,

Bhavna Hariharan

On Wed, Aug 2, 2017 at 9:47 PM, <bha...@multicorewareinc.com> wrote:

> # HG changeset patch
> # User Bhavna Hariharan <bha...@multicorewareinc.com>
> # Date 1501680985 -19800
> #  Wed Aug 02 19:06:25 2017 +0530
> # Node ID 041fcaf820659d504d887cfaa239a02988812c14
> # Parent  d11482e5fedbcdaf62ee3c6872f43827d99ad181
> Fix testbench crash with dhdr10 enabled
>
> __rdtsc() was being defined multiple times causing failure in testbench.
> This patch renames the function and duplicates the REPORT_SPEEDUP macro
> that
> supports GNU compiler.
>
> Display warning when dhdr10 is enabled with MSVC version that does not
> support c++11.
>
> diff -r d11482e5fedb -r 041fcaf82065 source/CMakeLists.txt
> --- a/source/CMakeLists.txt Mon Jul 24 11:15:38 2017 +0530
> +++ b/source/CMakeLists.txt Wed Aug 02 19:06:25 2017 +0530
> @@ -184,6 +184,9 @@
>  endif()
>  # this option is to enable the inclusion of dynamic HDR10 library to the
> libx265 compilation
>  option(ENABLE_HDR10_PLUS "Enable dynamic HDR10 compilation" OFF)
> +if(MSVC AND (MSVC_VERSION LESS 1800) AND ENABLE_HDR10_PLUS)
> +message(FATAL_ERROR "MSVC version 12.0 or above required to support
> hdr10plus")
> +endif()
>  if(GCC)
>  add_definitions(-Wall -Wextra -Wshadow)
>  add_definitions(-D__STDC_LIMIT_MACROS=1)
> diff -r d11482e5fedb -r 041fcaf82065 source/dynamicHDR10/
> metadataFromJson.cpp
> --- a/source/dynamicHDR10/metadataFromJson.cpp  Mon Jul 24 11:15:38 2017
> +0530
> +++ b/source/dynamicHDR10/metadataFromJson.cpp  Wed Aug 02 19:06:25 2017
> +0530
> @@ -34,8 +34,6 @@
>  #include "BasicStructures.h"
>  #include "SeiMetadataDictionary.h"
>
> -#define M_PI 3.14159265358979323846
> -
>  using namespace SeiMetadataDictionary;
>
>  class metadataFromJson::DynamicMetaIO
> diff -r d11482e5fedb -r 041fcaf82065 source/test/testharness.h
> --- a/source/test/testharness.h Mon Jul 24 11:15:38 2017 +0530
> +++ b/source/test/testharness.h Wed Aug 02 19:06:25 2017 +0530
> @@ -70,7 +70,7 @@
>  #include 
>  #elif defined(__GNUC__)
>  /* fallback for older GCC/MinGW */
> -static inline uint32_t __rdtsc(void)
> +static inline uint32_t rdtsc(void)
>  {
>  uint32_t a = 0;
>
> @@ -92,6 +92,41 @@
>  // Adapted from checkasm.c, runs each optimized primitive four times,
> measures rdtsc
>  // and discards invalid times.  Repeats 1000 times to get a good
> average.  Then measures
>  // the C reference with fewer runs and reports X factor and average
> cycles.
> +
> +// Duplicate macro to avoid conflict with in-built function __rdtsc()
> +#if defined(__GNUC__)
> +#define REPORT_SPEEDUP(RUNOPT, RUNREF, ...) \
> +{ \
> +uint32_t cycles = 0; int runs = 0; \
> +RUNOPT(__VA_ARGS__); \
> +for (int ti = 0; ti < BENCH_RUNS; ti++) { \
> +uint32_t t0 = (uint32_t)rdtsc(); \
> +RUNOPT(__VA_ARGS__); \
> +RUNOPT(__VA_ARGS__); \
> +RUNOPT(__VA_ARGS__); \
> +RUNOPT(__VA_ARGS__); \
> +uint32_t t1 = (uint32_t)rdtsc() - t0; \
> +if (t1 * runs <= cycles * 4 && ti > 0) { cycles += t1;
> runs++; } \
> +} \
> +uint32_t refcycles = 0; int refruns = 0; \
> +RUNREF(__VA_ARGS__); \
> +for (int ti = 0; ti < BENCH_RUNS / 4; ti++) { \
> +uint32_t t0 = (uint32_t)rdtsc(); \
> +RUNREF(__VA_ARGS__); \
> +RUNREF(__VA_ARGS__); \
> +RUNREF(__VA_ARGS__); \
> +RUNREF(__VA_ARGS__); \
> +uint32_t t1 = (uint32_t)rdtsc() - t0; \
> +if (t1 * refruns <= refcycles * 4 && ti > 0) { refcycles +=
> t1; refruns++; } \
> +} \
> +x265_emms(); \
> +float optperf = (10.0f * cycles / runs) / 4; \
> +float refperf = (10.0f * refcycles / refruns) / 4; \
> +printf("\t%3.2fx ", refperf / optperf); \
> +printf("\t %-8.2lf \t %-8.2lf\n", optperf, refperf); \
> +}
> +
> +#else
>  #define REPORT_SPEEDUP(RUNOPT, RUNREF, ...) \
>  { \
>  uint32_t cycles = 0; int runs = 0; \
> @@ -123,6 +158,7 @@
>  printf("\t %-8.2lf \t %-8.2lf\n", optperf, refperf); \
>  }
>
> +#endif
>  extern "C" {
>  #if X265_ARCH_X86
>  int PFX(stack_pagealign)(int (*func)(), int align);
>
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] dynamicHDR10 branch probably not cleaned?

2017-06-06 Thread Bhavna Hariharan
On Mon, May 29, 2017 at 3:43 PM, Mario *LigH* Rohkrämer <cont...@ligh.de>
wrote:

> I believe that the "dynamicHDR10" sub-target is not yet included in the
> "clean-generated" rule, causing previously compiled files not to be deleted
> in this directory branch when a cleanup was desired.
>
> +
> Scanning dependencies of target dynamicHDR10
> [  1%] Building CXX object dynamicHDR10/CMakeFiles/dynami
> cHDR10.dir/metadataFromJson.cpp.obj
> +
> # missing many more files here which are not built because their
> compilation result probably still exists, despite a previous "make
> clean-generated"
>

What are the files that aren't getting cleaned? I tried reproducing the
issue, all the dhdr10 files are being deleted after "make clean" and they
are re-generated after "make".


> +
> [  6%] Built target dynamicHDR10
> Scanning dependencies of target encoder
> +
>
> --
>
> Fun and success!
> Mario *LigH* Rohkrämer
> mailto:cont...@ligh.de
>
> _______
> x265-devel mailing list
> x265-devel@videolan.org
> https://mailman.videolan.org/listinfo/x265-devel




Regards,

Bhavna Hariharan
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH 3 of 4] limitTU : use neighbouring CUs' TU depth to limit 1st subTU's depth

2016-11-17 Thread Bhavna Hariharan
There is a mistake in the first patch, please apply the latest patch.


Regards,

Bhavna Hariharan

On Fri, Nov 18, 2016 at 9:49 AM, Bhavna Hariharan <
bha...@multicorewareinc.com> wrote:

> # HG changeset patch
> # User Bhavna Hariharan <bha...@multicorewareinc.com>
> # Date 1479380469 -19800
> #  Thu Nov 17 16:31:09 2016 +0530
> # Node ID d4425aa677c63b8486dde5bb716f9a191da6bdaa
> # Parent  07a4e4d785a69f719922129ca5997b12552bb4ab
> limitTU : use neighbouring CUs' TU depth to limit 1st subTU's depth
>
> diff -r 07a4e4d785a6 -r d4425aa677c6 source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp Thu Nov 17 12:19:38 2016 +0530
> +++ b/source/encoder/analysis.cpp Thu Nov 17 16:31:09 2016 +0530
> @@ -377,7 +377,7 @@
>  checkBestMode(md.pred[PRED_INTRA_NxN], depth);
>  }
>
> -if (limitTU == X265_TU_LIMIT_NEIGH && cuGeom.log2CUSize >= 4)
> +if ((limitTU & X265_TU_LIMIT_NEIGH) && cuGeom.log2CUSize >= 4)
>  {
>  CUData* ctu = md.bestMode->cu.m_encData->
> getPicCTU(parentCTU.m_cuAddr);
>  int8_t maxTUDepth = -1;
> @@ -954,7 +954,7 @@
>  bool skipRectAmp = false;
>  bool chooseMerge = false;
>
> -if (limitTU == X265_TU_LIMIT_NEIGH && cuGeom.log2CUSize >= 4)
> +if ((limitTU & X265_TU_LIMIT_NEIGH) && cuGeom.log2CUSize >= 4)
>  m_maxTUDepth = loadTUDepth(cuGeom, parentCTU);
>
>  SplitData splitData[4];
> @@ -1463,7 +1463,7 @@
>  if (m_param->rdLevel)
>  md.bestMode->reconYuv.copyToPicYuv(reconPic, cuAddr,
> cuGeom.absPartIdx);
>
> -if (limitTU == X265_TU_LIMIT_NEIGH && cuGeom.log2CUSize >= 4)
> +if ((limitTU & X265_TU_LIMIT_NEIGH) && cuGeom.log2CUSize >= 4)
>  {
>  if (mightNotSplit)
>  {
> @@ -1498,7 +1498,7 @@
>  md.pred[PRED_2Nx2N].rdCost = 0;
>  }
>
> -if (limitTU == X265_TU_LIMIT_NEIGH && cuGeom.log2CUSize >= 4)
> +if ((limitTU & X265_TU_LIMIT_NEIGH) && cuGeom.log2CUSize >= 4)
>  m_maxTUDepth =  loadTUDepth(cuGeom, parentCTU);
>
>  SplitData splitData[4];
> @@ -1827,7 +1827,7 @@
>  if (mightSplit)
>  addSplitFlagCost(*md.bestMode, cuGeom.depth);
>  }
> -if (limitTU == X265_TU_LIMIT_NEIGH && cuGeom.log2CUSize >= 4)
> +if ((limitTU & X265_TU_LIMIT_NEIGH) && cuGeom.log2CUSize >= 4)
>  {
>  if (mightNotSplit)
>  {
> diff -r 07a4e4d785a6 -r d4425aa677c6 source/encoder/search.cpp
> --- a/source/encoder/search.cpp Thu Nov 17 12:19:38 2016 +0530
> +++ b/source/encoder/search.cpp Thu Nov 17 16:31:09 2016 +0530
> @@ -2625,7 +2625,7 @@
>
>  uint32_t tuDepthRange[2];
>  cu.getInterTUQtDepthRange(tuDepthRange, 0);
> -if (limitTU == X265_TU_LIMIT_NEIGH)
> +if (limitTU & X265_TU_LIMIT_NEIGH)
>  {
>  int maxLog2CUSize = (int)g_log2Size[m_param->maxCUSize];
>  m_maxTUDepth = x265_clip3(maxLog2CUSize -
> (int32_t)tuDepthRange[1], maxLog2CUSize - (int32_t)tuDepthRange[0],
> m_maxTUDepth);
> @@ -2639,7 +2639,14 @@
>  memset(_cacheTU, 0, sizeof(TUInfoCache));
>
>  Cost costs;
> -estimateResidualQT(interMode, cuGeom, 0, 0, *resiYuv, costs,
> tuDepthRange);
> +if (limitTU == X265_TU_LIMIT_DFS_NEIGH)
> +{
> +int32_t tempDepth = m_maxTUDepth;
> +estimateResidualQT(interMode, cuGeom, 0, 0, *resiYuv, costs,
> tuDepthRange);
> +m_maxTUDepth = tempDepth;
> +}
> +else
> +estimateResidualQT(interMode, cuGeom, 0, 0, *resiYuv, costs,
> tuDepthRange);
>
>  uint32_t tqBypass = cu.m_tqBypass[0];
>  if (!tqBypass)
> @@ -2898,10 +2905,11 @@
>  uint32_t ycbf = 0, ucbf = 0, vcbf = 0;
>  for (uint32_t qIdx = 0, qPartIdx = absPartIdx; qIdx < 4; ++qIdx,
> qPartIdx += qNumParts)
>  {
> -if (limitTU == X265_TU_LIMIT_DFS && tuDepth == 0 && qIdx == 1)
> +if (limitTU & X265_TU_LIMIT_DFS && tuDepth == 0 && qIdx == 1)
>  {
> +m_maxTUDepth = cu.m_tuDepth[0];
>  // Fetch maximum TU depth of first sub partition to limit
> recursion of others
> -for (uint32_t i = 0; i < cuGeom.numPartitions / 4; i++)
> +for (uint32_t i = 1; i < cuGeom.numPartitions / 4; i++)
>  m_maxTUDepth = X265_MAX(m_maxTUDepth, cu.m_tuDepth[i]);
>  }
>      estimateResidualQT(mode, cuGeom, qPartIdx, tuDepth + 1, resiYuv,
> splitCost, depthRange, splitMore);
> @@ -2968,7 +2976,7 @@
>  }
>  }
>

Re: [x265] [PATCH 3 of 4] limitTU : use neighbouring CUs' TU depth to limit 1st subTU's depth

2016-11-17 Thread Bhavna Hariharan
# HG changeset patch
# User Bhavna Hariharan <bha...@multicorewareinc.com>
# Date 1479380469 -19800
#  Thu Nov 17 16:31:09 2016 +0530
# Node ID d4425aa677c63b8486dde5bb716f9a191da6bdaa
# Parent  07a4e4d785a69f719922129ca5997b12552bb4ab
limitTU : use neighbouring CUs' TU depth to limit 1st subTU's depth

diff -r 07a4e4d785a6 -r d4425aa677c6 source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp Thu Nov 17 12:19:38 2016 +0530
+++ b/source/encoder/analysis.cpp Thu Nov 17 16:31:09 2016 +0530
@@ -377,7 +377,7 @@
 checkBestMode(md.pred[PRED_INTRA_NxN], depth);
 }

-if (limitTU == X265_TU_LIMIT_NEIGH && cuGeom.log2CUSize >= 4)
+if ((limitTU & X265_TU_LIMIT_NEIGH) && cuGeom.log2CUSize >= 4)
 {
 CUData* ctu =
md.bestMode->cu.m_encData->getPicCTU(parentCTU.m_cuAddr);
 int8_t maxTUDepth = -1;
@@ -954,7 +954,7 @@
 bool skipRectAmp = false;
 bool chooseMerge = false;

-if (limitTU == X265_TU_LIMIT_NEIGH && cuGeom.log2CUSize >= 4)
+if ((limitTU & X265_TU_LIMIT_NEIGH) && cuGeom.log2CUSize >= 4)
 m_maxTUDepth = loadTUDepth(cuGeom, parentCTU);

 SplitData splitData[4];
@@ -1463,7 +1463,7 @@
 if (m_param->rdLevel)
 md.bestMode->reconYuv.copyToPicYuv(reconPic, cuAddr,
cuGeom.absPartIdx);

-if (limitTU == X265_TU_LIMIT_NEIGH && cuGeom.log2CUSize >= 4)
+if ((limitTU & X265_TU_LIMIT_NEIGH) && cuGeom.log2CUSize >= 4)
 {
 if (mightNotSplit)
 {
@@ -1498,7 +1498,7 @@
 md.pred[PRED_2Nx2N].rdCost = 0;
 }

-if (limitTU == X265_TU_LIMIT_NEIGH && cuGeom.log2CUSize >= 4)
+if ((limitTU & X265_TU_LIMIT_NEIGH) && cuGeom.log2CUSize >= 4)
 m_maxTUDepth =  loadTUDepth(cuGeom, parentCTU);

 SplitData splitData[4];
@@ -1827,7 +1827,7 @@
 if (mightSplit)
 addSplitFlagCost(*md.bestMode, cuGeom.depth);
 }
-if (limitTU == X265_TU_LIMIT_NEIGH && cuGeom.log2CUSize >= 4)
+if ((limitTU & X265_TU_LIMIT_NEIGH) && cuGeom.log2CUSize >= 4)
 {
 if (mightNotSplit)
 {
diff -r 07a4e4d785a6 -r d4425aa677c6 source/encoder/search.cpp
--- a/source/encoder/search.cpp Thu Nov 17 12:19:38 2016 +0530
+++ b/source/encoder/search.cpp Thu Nov 17 16:31:09 2016 +0530
@@ -2625,7 +2625,7 @@

 uint32_t tuDepthRange[2];
 cu.getInterTUQtDepthRange(tuDepthRange, 0);
-if (limitTU == X265_TU_LIMIT_NEIGH)
+if (limitTU & X265_TU_LIMIT_NEIGH)
 {
 int maxLog2CUSize = (int)g_log2Size[m_param->maxCUSize];
 m_maxTUDepth = x265_clip3(maxLog2CUSize -
(int32_t)tuDepthRange[1], maxLog2CUSize - (int32_t)tuDepthRange[0],
m_maxTUDepth);
@@ -2639,7 +2639,14 @@
 memset(_cacheTU, 0, sizeof(TUInfoCache));

 Cost costs;
-estimateResidualQT(interMode, cuGeom, 0, 0, *resiYuv, costs,
tuDepthRange);
+if (limitTU == X265_TU_LIMIT_DFS_NEIGH)
+{
+int32_t tempDepth = m_maxTUDepth;
+estimateResidualQT(interMode, cuGeom, 0, 0, *resiYuv, costs,
tuDepthRange);
+m_maxTUDepth = tempDepth;
+}
+else
+estimateResidualQT(interMode, cuGeom, 0, 0, *resiYuv, costs,
tuDepthRange);

 uint32_t tqBypass = cu.m_tqBypass[0];
 if (!tqBypass)
@@ -2898,10 +2905,11 @@
 uint32_t ycbf = 0, ucbf = 0, vcbf = 0;
 for (uint32_t qIdx = 0, qPartIdx = absPartIdx; qIdx < 4; ++qIdx,
qPartIdx += qNumParts)
 {
-if (limitTU == X265_TU_LIMIT_DFS && tuDepth == 0 && qIdx == 1)
+if (limitTU & X265_TU_LIMIT_DFS && tuDepth == 0 && qIdx == 1)
 {
+m_maxTUDepth = cu.m_tuDepth[0];
 // Fetch maximum TU depth of first sub partition to limit
recursion of others
-for (uint32_t i = 0; i < cuGeom.numPartitions / 4; i++)
+for (uint32_t i = 1; i < cuGeom.numPartitions / 4; i++)
 m_maxTUDepth = X265_MAX(m_maxTUDepth, cu.m_tuDepth[i]);
 }
 estimateResidualQT(mode, cuGeom, qPartIdx, tuDepth + 1, resiYuv,
splitCost, depthRange, splitMore);
@@ -2968,7 +2976,7 @@
 }
 }
 }
-else if (limitTU == X265_TU_LIMIT_DFS || limitTU ==
X265_TU_LIMIT_NEIGH)
+else if (limitTU & X265_TU_LIMIT_DFS_NEIGH)
     {
 if (bCheckSplit && m_maxTUDepth >= 0)
 {



Regards,

Bhavna Hariharan

On Thu, Nov 17, 2016 at 5:06 PM, <bha...@multicorewareinc.com> wrote:

> # HG changeset patch
> # User Bhavna Hariharan <bha...@multicorewareinc.com>
> # Date 1479380469 -19800
> #  Thu Nov 17 16:31:09 2016 +0530
> # Node ID f5d3a5aedcdf20235ec76dbf9aba516da83e8dd1
> # Parent  07a4e4d785a69f719922129ca5997b12552bb4ab
> limitTU : use neighbouring CUs' TU depth to limit 1st subTU's depth
>
> diff -r 07a4e4d785a6 -r f5d3a5aedcdf s

Re: [x265] [PATCH] limitTU: fix energy calculation used in limiting TU recursion

2016-10-18 Thread Bhavna Hariharan
On Mon, Oct 17, 2016 at 8:30 PM, Ashok Kumar Mishra <
as...@multicorewareinc.com> wrote:

>
>
> On Mon, Oct 17, 2016 at 3:14 PM, Bhavna Hariharan <
> bha...@multicorewareinc.com> wrote:
>
>>
>> On Mon, Oct 17, 2016 at 2:57 PM, Pradeep Ramachandran <
>> prad...@multicorewareinc.com> wrote:
>>
>>>
>>> On Fri, Oct 14, 2016 at 7:20 PM, <bha...@multicorewareinc.com> wrote:
>>>
>>>> # HG changeset patch
>>>> # User Bhavna Hariharan <bha...@multicorewareinc.com>
>>>> # Date 1476275329 -19800
>>>> #  Wed Oct 12 17:58:49 2016 +0530
>>>> # Node ID 854149baceefa075c3b1af12433680ffda2e3b64
>>>> # Parent  c97805dad9148ad3cddba10a67ed5596508e8f86
>>>> limitTU: fix energy calculation used in limiting TU recursion
>>>>
>>>> This commit changes the output of limit TU
>>>>
>>>> diff -r c97805dad914 -r 854149baceef source/encoder/search.cpp
>>>> --- a/source/encoder/search.cpp Thu Oct 13 17:53:48 2016 +0800
>>>> +++ b/source/encoder/search.cpp Wed Oct 12 17:58:49 2016 +0530
>>>> @@ -3420,14 +3420,15 @@
>>>>  if (m_param->limitTU && bCheckSplit)
>>>>  {
>>>>  // Stop recursion if the TU's energy level is minimal
>>>> +uint32_t numCoeff = trSize * trSize;
>>>>  if (cbfFlag[TEXT_LUMA][0] == 0)
>>>>  bCheckSplit = false;
>>>> -else if (numSig[TEXT_LUMA][0] < (cuGeom.numPartitions /
>>>> 16))
>>>> +else if (numSig[TEXT_LUMA][0] < (numCoeff / 64))
>>>>  {
>>>>  uint32_t energy = 0;
>>>> -for (uint32_t i = 0; i < cuGeom.numPartitions; i++)
>>>> +for (uint32_t i = 0; i < numCoeff; i++)
>>>>  energy += abs(coeffCurY[i]);
>>>> -if (energy < numSig[TEXT_LUMA][0])
>>>> +if (energy == numSig[TEXT_LUMA][0])
>>>>  bCheckSplit = false;
>>>>
>>>
>>> Can you give an example where CheckSplit is disabled here? I am finding
>>> it hard to reason conditions under which this condition is satisfied.
>>>
>>
>> Energy will be equal to the number of significant coefficients when each
>> of the non-zero coefficients is one.
>>
>>
>
> I feel this condition may not be satisfied(very rare). You are calculating
> energy as sum of abs values of transform coefficients and checking with a
> threshold(number of coefficients).
> There is a very less chance that both will be same. Either it should be
> less than equal to or greater than equal to. This may be a test to
> replicate a zero coefficient TU block. We
> should check the subjective quality particularly for limit TU, considering
> larger TUs produce more ringing artifacts.
>


The number of significant coefficients can never be less than the energy,
if the energy is greater than the number of coefficients we don't want to
limit the TU depth. We noticed a performance improvement up to 9% for
certain videos. We checked the visual quality for the videos and have not
observed any ringing artifacts so far.

croud run:

with energy check - encoded 500 frames in 545.41s (0.93 fps), 8878.75 kb/s,
Avg QP:38.00, SSIM Mean Y: 0.8659922 ( 8.729 dB)
without energy check - encoded 500 frames in 568.05s (0.85 fps), 8879.27
kb/s, Avg QP:38.00, SSIM Mean Y: 0.8660624 ( 8.731 dB)
Improvement in performance - 8.6%
Hit percentage of energy check - 61%

kimono:

with energy check - encoded 240 frames in 286.38s (0.84 fps), 4361.14 kb/s,
Avg QP:26.83, SSIM Mean Y: 0.9579188 (13.759 dB)
without energy check - encoded 240 frames in 312.36s (0.77 fps), 4361.61
kb/s, Avg QP:26.83, SSIM Mean Y: 0.9579364 (13.761 dB)
Improvement in performance - 8.3%
Hit percentage of energy check - 70%




>
>>>
>>>>  }
>>>>  }
>>>> ___
>>>> x265-devel mailing list
>>>> x265-devel@videolan.org
>>>> https://mailman.videolan.org/listinfo/x265-devel
>>>>
>>>
>>>
>>> ___
>>> x265-devel mailing list
>>> x265-devel@videolan.org
>>> https://mailman.videolan.org/listinfo/x265-devel
>>
>>
>>
>>
>> ___
>> x265-devel mailing list
>> x265-devel@videolan.org
>> https://mailman.videolan.org/listinfo/x265-devel
>>
>>
>
> ___
> x265-devel mailing list
> x265-devel@videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
>
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH] limitTU: fix energy calculation used in limiting TU recursion

2016-10-17 Thread Bhavna Hariharan
On Mon, Oct 17, 2016 at 2:57 PM, Pradeep Ramachandran <
prad...@multicorewareinc.com> wrote:

>
> On Fri, Oct 14, 2016 at 7:20 PM, <bha...@multicorewareinc.com> wrote:
>
>> # HG changeset patch
>> # User Bhavna Hariharan <bha...@multicorewareinc.com>
>> # Date 1476275329 -19800
>> #  Wed Oct 12 17:58:49 2016 +0530
>> # Node ID 854149baceefa075c3b1af12433680ffda2e3b64
>> # Parent  c97805dad9148ad3cddba10a67ed5596508e8f86
>> limitTU: fix energy calculation used in limiting TU recursion
>>
>> This commit changes the output of limit TU
>>
>> diff -r c97805dad914 -r 854149baceef source/encoder/search.cpp
>> --- a/source/encoder/search.cpp Thu Oct 13 17:53:48 2016 +0800
>> +++ b/source/encoder/search.cpp Wed Oct 12 17:58:49 2016 +0530
>> @@ -3420,14 +3420,15 @@
>>  if (m_param->limitTU && bCheckSplit)
>>  {
>>  // Stop recursion if the TU's energy level is minimal
>> +uint32_t numCoeff = trSize * trSize;
>>  if (cbfFlag[TEXT_LUMA][0] == 0)
>>  bCheckSplit = false;
>> -else if (numSig[TEXT_LUMA][0] < (cuGeom.numPartitions / 16))
>> +else if (numSig[TEXT_LUMA][0] < (numCoeff / 64))
>>  {
>>  uint32_t energy = 0;
>> -for (uint32_t i = 0; i < cuGeom.numPartitions; i++)
>> +for (uint32_t i = 0; i < numCoeff; i++)
>>  energy += abs(coeffCurY[i]);
>> -if (energy < numSig[TEXT_LUMA][0])
>> +if (energy == numSig[TEXT_LUMA][0])
>>  bCheckSplit = false;
>>
>
> Can you give an example where CheckSplit is disabled here? I am finding it
> hard to reason conditions under which this condition is satisfied.
>

Energy will be equal to the number of significant coefficients when each of
the non-zero coefficients is one.


>
>
>>  }
>>  }
>> ___
>> x265-devel mailing list
>> x265-devel@videolan.org
>> https://mailman.videolan.org/listinfo/x265-devel
>>
>
>
> ___
> x265-devel mailing list
> x265-devel@videolan.org
> https://mailman.videolan.org/listinfo/x265-devel





Regards,

Bhavna Hariharan
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


Re: [x265] [PATCH 2 of 4] limit TU : use cbf and quantization coefficients to limit recursion

2016-10-08 Thread Bhavna Hariharan
Hi Deepthi,

On Fri, Oct 7, 2016 at 1:17 PM, Deepthi Nandakumar <
deepthipnandaku...@gmail.com> wrote:

> Kavitha/Bhavana, excellent job! The test metrics look pretty good.
>
>
>
> On Tue, Oct 4, 2016 at 2:50 PM, <kavi...@multicorewareinc.com> wrote:
>
>> # HG changeset patch
>> # User Bhavna Hariharan <bha...@multicorewareinc.com>
>> # Date 1474620761 -19800
>> #  Fri Sep 23 14:22:41 2016 +0530
>> # Node ID c018bc0ffc156902b1a9a13ecd6996d30d7403df
>> # Parent  c10ef341f4e65883243f78040f52ed06ace99535
>> limit TU : use cbf and quantization coefficients to limit recursion
>>
>> diff -r c10ef341f4e6 -r c018bc0ffc15 source/encoder/search.cpp
>> --- a/source/encoder/search.cpp Tue Oct 04 13:27:48 2016 +0530
>> +++ b/source/encoder/search.cpp Fri Sep 23 14:22:41 2016 +0530
>> @@ -3194,6 +3194,8 @@
>>  singlePsyEnergy[TEXT_LUMA][0] = nonZeroPsyEnergyY;
>>  cbfFlag[TEXT_LUMA][0] = !!numSigTSkipY;
>>  bestTransformMode[TEXT_LUMA][0] = 1;
>> +if (m_param->limitTU)
>> +numSig[TEXT_LUMA][0] = numSigTSkipY;
>>  uint32_t numCoeffY = 1 << (log2TrSize << 1);
>>  memcpy(coeffCurY, m_tsCoeff, sizeof(coeff_t) *
>> numCoeffY);
>>  primitives.cu[partSize].copy_ss(curResiY, strideResiY,
>> m_tsResidual, trSize);
>> @@ -3331,6 +,21 @@
>>  fullCost.rdcost = m_rdCost.calcPsyRdCost(fullCost.distortion,
>> fullCost.bits, fullCost.energy);
>>  else
>>  fullCost.rdcost = m_rdCost.calcRdCost(fullCost.distortion,
>> fullCost.bits);
>> +
>> +if (m_param->limitTU && bCheckSplit)
>> +{
>> +// Stop recursion if the TU's energy level is minimal
>> +if (cbfFlag[TEXT_LUMA][0] == 0)
>> +bCheckSplit = false;
>>
>
> Agreed.
>
> +else if (numSig[TEXT_LUMA][0] < (cuGeom.numPartitions / 16))
>> +{
>> +uint32_t energy = 0;
>> +for (uint32_t i = 0; i < cuGeom.numPartitions; i++)
>> +energy += abs(coeffCurY[i]);
>> +if (energy < numSig[TEXT_LUMA][0])
>>
>
> One question, why are we comparing actual coefficient values to number of
> significant coefficients?
>

We want to stop recursion when the energy of TU is low. If the value of
each of the coefficients is minimal (close to 1), the energy will be less
than the number of coefficients.


>
>> +bCheckSplit = false;
>> +}
>> +}
>>  }
>>
>>  // code sub-blocks
>> ___
>> x265-devel mailing list
>> x265-devel@videolan.org
>> https://mailman.videolan.org/listinfo/x265-devel
>>
>
>
>
> --
> Deepthi
>
> ___
> x265-devel mailing list
> x265-devel@videolan.org
> https://mailman.videolan.org/listinfo/x265-devel



Regards,

Bhavna Hariharan
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel