[x265] [PATCH 3 of 4] limitTU : use neighbouring CUs' TU depth to limit 1st subTU's depth

2016-11-18 Thread bhavna
# HG changeset patch
# User Bhavna Hariharan 
# Date 1479454023 -19800
#  Fri Nov 18 12:57:03 2016 +0530
# Node ID df9a0c94631c9abac75e5a7cf4bd48ff55adced8
# Parent  40a0a322b26fc0516a72d4de9a941e18b5bb97b9
limitTU : use neighbouring CUs' TU depth to limit 1st subTU's depth

diff -r 40a0a322b26f -r df9a0c94631c doc/reST/cli.rst
--- a/doc/reST/cli.rst  Fri Nov 18 12:06:08 2016 +0530
+++ b/doc/reST/cli.rst  Fri Nov 18 12:57:03 2016 +0530
@@ -869,7 +869,7 @@
partitions, in which case a TU split is implied and thus the
residual quad-tree begins one layer below the CU quad-tree.
 
-.. option:: --limit-tu <0..3>
+.. option:: --limit-tu <0..4>
 
Enables early exit from TU depth recursion, for inter coded blocks.
Level 1 - decides to recurse to next higher depth based on cost 
@@ -878,6 +878,9 @@
other split subTUs.
Level 3 - based on the average depth of the co-located and the neighbor
CUs' TU depth, limits recursion of the current CU.
+   Level 4 - uses the depth of the neighbouring/ co-located CUs TU depth 
+   to limit the 1st subTU depth. The 1st subTU depth is taken as the 
+   limiting depth for the other subTUs.
 
Default: 0
 
diff -r 40a0a322b26f -r df9a0c94631c source/common/param.cpp
--- a/source/common/param.cpp   Fri Nov 18 12:06:08 2016 +0530
+++ b/source/common/param.cpp   Fri Nov 18 12:57:03 2016 +0530
@@ -1126,7 +1126,7 @@
   "QuadtreeTUMaxDepthInter must be less than or equal to the 
difference between log2(maxCUSize) and QuadtreeTULog2MinSize plus 1");
 CHECK((param->maxTUSize != 32 && param->maxTUSize != 16 && 
param->maxTUSize != 8 && param->maxTUSize != 4),
   "max TU size must be 4, 8, 16, or 32");
-CHECK(param->limitTU > 3, "Invalid limit-tu option, limit-TU must be 
between 0 and 3");
+CHECK(param->limitTU > 4, "Invalid limit-tu option, limit-TU must be 
between 0 and 4");
 CHECK(param->maxNumMergeCand < 1, "MaxNumMergeCand must be 1 or greater.");
 CHECK(param->maxNumMergeCand > 5, "MaxNumMergeCand must be 5 or smaller.");
 
diff -r 40a0a322b26f -r df9a0c94631c source/encoder/search.cpp
--- a/source/encoder/search.cpp Fri Nov 18 12:06:08 2016 +0530
+++ b/source/encoder/search.cpp Fri Nov 18 12:57:03 2016 +0530
@@ -103,6 +103,8 @@
 m_limitTU = X265_TU_LIMIT_DFS;
 else if (m_param->limitTU == 3)
 m_limitTU = X265_TU_LIMIT_NEIGH;
+else if (m_param->limitTU == 4)
+m_limitTU = X265_TU_LIMIT_DFS + X265_TU_LIMIT_NEIGH;
 }
 
 /* these are indexed by qtLayer (log2size - 2) so nominally 0=4x4, 1=8x8, 
2=16x16, 3=32x32
@@ -2638,13 +2640,20 @@
 
 m_entropyCoder.load(m_rqt[depth].cur);
 
-if (m_limitTU & X265_TU_LIMIT_DFS)
+if ((m_limitTU & X265_TU_LIMIT_DFS) && !(m_limitTU & X265_TU_LIMIT_NEIGH))
 m_maxTUDepth = -1;
 else if (m_limitTU & X265_TU_LIMIT_BFS)
 memset(_cacheTU, 0, sizeof(TUInfoCache));
 
 Cost costs;
-estimateResidualQT(interMode, cuGeom, 0, 0, *resiYuv, costs, tuDepthRange);
+if ((m_limitTU & X265_TU_LIMIT_DFS) && (m_limitTU & X265_TU_LIMIT_NEIGH))
+{
+int32_t tempDepth = m_maxTUDepth;
+estimateResidualQT(interMode, cuGeom, 0, 0, *resiYuv, costs, 
tuDepthRange);
+m_maxTUDepth = tempDepth;
+}
+else
+estimateResidualQT(interMode, cuGeom, 0, 0, *resiYuv, costs, 
tuDepthRange);
 
 uint32_t tqBypass = cu.m_tqBypass[0];
 if (!tqBypass)
@@ -2905,8 +2914,9 @@
 {
 if ((m_limitTU & X265_TU_LIMIT_DFS) && tuDepth == 0 && qIdx == 1)
 {
+m_maxTUDepth = cu.m_tuDepth[0];
 // Fetch maximum TU depth of first sub partition to limit 
recursion of others
-for (uint32_t i = 0; i < cuGeom.numPartitions / 4; i++)
+for (uint32_t i = 1; i < cuGeom.numPartitions / 4; i++)
 m_maxTUDepth = X265_MAX(m_maxTUDepth, cu.m_tuDepth[i]);
 }
 estimateResidualQT(mode, cuGeom, qPartIdx, tuDepth + 1, resiYuv, 
splitCost, depthRange, splitMore);
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] [PATCH 4 of 4] tests: update command lines to cover limitTU 3 and 4

2016-11-18 Thread bhavna
# HG changeset patch
# User Bhavna Hariharan 
# Date 1479302428 -19800
#  Wed Nov 16 18:50:28 2016 +0530
# Node ID b476a6420b1ccfebe53bee4ed715b556f443e7f6
# Parent  df9a0c94631c9abac75e5a7cf4bd48ff55adced8
tests:  update command lines to cover limitTU 3 and 4

diff -r df9a0c94631c -r b476a6420b1c source/test/regression-tests.txt
--- a/source/test/regression-tests.txt  Fri Nov 18 12:57:03 2016 +0530
+++ b/source/test/regression-tests.txt  Wed Nov 16 18:50:28 2016 +0530
@@ -19,10 +19,10 @@
 BasketballDrive_1920x1080_50.y4m,--preset medium --keyint -1 --nr-inter 100 
-F4 --no-sao
 BasketballDrive_1920x1080_50.y4m,--preset medium --no-cutree 
--analysis-mode=save --bitrate 7000 --limit-modes,--preset medium --no-cutree 
--analysis-mode=load --bitrate 7000 --limit-modes
 BasketballDrive_1920x1080_50.y4m,--preset slow --nr-intra 100 -F4 
--aq-strength 3 --qg-size 16 --limit-refs 1
-BasketballDrive_1920x1080_50.y4m,--preset slower --lossless --chromaloc 3 
--subme 0
+BasketballDrive_1920x1080_50.y4m,--preset slower --lossless --chromaloc 3 
--subme 0 --limit-tu 4
 BasketballDrive_1920x1080_50.y4m,--preset slower --no-cutree 
--analysis-mode=save --bitrate 7000,--preset slower --no-cutree 
--analysis-mode=load --bitrate 7000
-BasketballDrive_1920x1080_50.y4m,--preset veryslow --crf 4 --cu-lossless 
--pmode --limit-refs 1 --aq-mode 3 --limit-tu 1
-BasketballDrive_1920x1080_50.y4m,--preset veryslow --no-cutree 
--analysis-mode=save --bitrate 7000 --tskip-fast,--preset veryslow --no-cutree 
--analysis-mode=load --bitrate 7000  --tskip-fast
+BasketballDrive_1920x1080_50.y4m,--preset veryslow --crf 4 --cu-lossless 
--pmode --limit-refs 1 --aq-mode 3 --limit-tu 3
+BasketballDrive_1920x1080_50.y4m,--preset veryslow --no-cutree 
--analysis-mode=save --bitrate 7000 --tskip-fast --limit-tu 4,--preset veryslow 
--no-cutree --analysis-mode=load --bitrate 7000  --tskip-fast --limit-tu 4
 BasketballDrive_1920x1080_50.y4m,--preset veryslow --recon-y4m-exec "ffplay -i 
pipe:0 -autoexit"
 Coastguard-4k.y4m,--preset ultrafast --recon-y4m-exec "ffplay -i pipe:0 
-autoexit"
 Coastguard-4k.y4m,--preset superfast --tune grain --overscan=crop
@@ -47,7 +47,7 @@
 DucksAndLegs_1920x1080_60_10bit_422.yuv,--preset slow --temporal-layers 
--no-psy-rd --qg-size 32 --limit-refs 0 --cu-lossless
 DucksAndLegs_1920x1080_60_10bit_444.yuv,--preset veryfast --weightp --nr-intra 
1000 -F4
 DucksAndLegs_1920x1080_60_10bit_444.yuv,--preset medium --nr-inter 500 -F4 
--no-psy-rdoq
-DucksAndLegs_1920x1080_60_10bit_444.yuv,--preset slower --no-weightp 
--rdoq-level 0 --limit-refs 3
+DucksAndLegs_1920x1080_60_10bit_444.yuv,--preset slower --no-weightp 
--rdoq-level 0 --limit-refs 3 --tu-inter-depth 4 --limit-tu 3
 DucksAndLegs_1920x1080_60_10bit_422.yuv,--preset fast --no-cutree 
--analysis-mode=save --bitrate 3000 --early-skip --tu-inter-depth 3 --limit-tu 
1,--preset fast --no-cutree --analysis-mode=load --bitrate 3000 --early-skip 
--tu-inter-depth 3 --limit-tu 1
 FourPeople_1280x720_60.y4m,--preset superfast --no-wpp --lookahead-slices 2
 FourPeople_1280x720_60.y4m,--preset veryfast --aq-mode 2 --aq-strength 1.5 
--qg-size 8
@@ -80,7 +80,7 @@
 RaceHorses_416x240_30.y4m,--preset superfast --no-cutree
 RaceHorses_416x240_30.y4m,--preset medium --tskip-fast --tskip
 RaceHorses_416x240_30.y4m,--preset slower --keyint -1 --rdoq-level 0 
--limit-tu 2
-RaceHorses_416x240_30.y4m,--preset veryslow --tskip-fast --tskip --limit-refs 
3 --limit-tu 2
+RaceHorses_416x240_30.y4m,--preset veryslow --tskip-fast --tskip --limit-refs 
3 --limit-tu 3
 RaceHorses_416x240_30_10bit.yuv,--preset ultrafast --tune psnr --limit-refs 1
 RaceHorses_416x240_30_10bit.yuv,--preset veryfast --weightb
 RaceHorses_416x240_30_10bit.yuv,--preset faster --rdoq-level 0 --dither
@@ -110,7 +110,7 @@
 ducks_take_off_420_720p50.y4m,--preset veryslow --constrained-intra --bframes 2
 mobile_calendar_422_ntsc.y4m,--preset superfast --weightp
 mobile_calendar_422_ntsc.y4m,--preset medium --bitrate 500 -F4
-mobile_calendar_422_ntsc.y4m,--preset slower --tskip --tskip-fast --limit-tu 2
+mobile_calendar_422_ntsc.y4m,--preset slower --tskip --tskip-fast --limit-tu 4
 mobile_calendar_422_ntsc.y4m,--preset veryslow --tskip --limit-refs 2
 old_town_cross_444_720p50.y4m,--preset ultrafast --weightp --min-cu 32
 old_town_cross_444_720p50.y4m,--preset superfast --weightp --min-cu 16 
--limit-modes
@@ -120,7 +120,7 @@
 old_town_cross_444_720p50.y4m,--preset medium --keyint -1 --no-weightp --ref 6
 old_town_cross_444_720p50.y4m,--preset slow --rdoq-level 1 --early-skip --ref 
7 --no-b-pyramid
 old_town_cross_444_720p50.y4m,--preset slower --crf 4 --cu-lossless
-old_town_cross_444_720p50.y4m,--preset veryslow --max-tu-size 4 --min-cu-size 
32 --limit-tu 1
+old_town_cross_444_720p50.y4m,--preset veryslow --max-tu-size 4 --min-cu-size 
32 --limit-tu 4
 parkrun_ter_720p50.y4m,--preset medium --no-open-gop --sao-non-deblock --crf 4 
--cu-lossless
 parkrun_ter_720p50.y4m,--preset 

[x265] [PATCH 1 of 4] limitTU : modify condition for limitTU 1 and 2

2016-11-18 Thread bhavna
# HG changeset patch
# User Bhavna Hariharan 
# Date 1479449945 -19800
#  Fri Nov 18 11:49:05 2016 +0530
# Node ID c5295126f248411481a8361acfd2bc8b0636cedc
# Parent  4c1652f3884fba9fab4c589dd057b12e6bf33d5b
limitTU : modify condition for limitTU 1 and 2

diff -r 4c1652f3884f -r c5295126f248 source/encoder/search.cpp
--- a/source/encoder/search.cpp Tue Nov 15 11:16:04 2016 +0530
+++ b/source/encoder/search.cpp Fri Nov 18 11:49:05 2016 +0530
@@ -94,6 +94,15 @@
 uint32_t sizeC = sizeL >> (m_hChromaShift + m_vChromaShift);
 uint32_t numPartitions = 1 << (maxLog2CUSize - LOG2_UNIT_SIZE) * 2;
 
+m_limitTU = 0;
+if (m_param->limitTU)
+{
+if (m_param->limitTU == 1)
+m_limitTU = X265_TU_LIMIT_BFS;
+else if (m_param->limitTU == 2)
+m_limitTU = X265_TU_LIMIT_DFS;
+}
+
 /* these are indexed by qtLayer (log2size - 2) so nominally 0=4x4, 1=8x8, 
2=16x16, 3=32x32
  * the coeffRQT and reconQtYuv are allocated to the max CU size at every 
depth. The parts
  * which are reconstructed at each depth are valid. At the end, the 
transform depth table
@@ -2621,9 +2630,9 @@
 
 m_entropyCoder.load(m_rqt[depth].cur);
 
-if (m_param->limitTU == X265_TU_LIMIT_DFS)
+if (m_param->limitTU & X265_TU_LIMIT_DFS)
 m_maxTUDepth = 0;
-else if (m_param->limitTU == X265_TU_LIMIT_BFS)
+else if (m_param->limitTU & X265_TU_LIMIT_BFS)
 memset(_cacheTU, 0, sizeof(TUInfoCache));
 
 Cost costs;
@@ -2886,7 +2895,7 @@
 uint32_t ycbf = 0, ucbf = 0, vcbf = 0;
 for (uint32_t qIdx = 0, qPartIdx = absPartIdx; qIdx < 4; ++qIdx, qPartIdx 
+= qNumParts)
 {
-if (m_param->limitTU == X265_TU_LIMIT_DFS && tuDepth == 0 && qIdx == 1)
+if ((m_param->limitTU & X265_TU_LIMIT_DFS) && tuDepth == 0 && qIdx == 
1)
 {
 // Fetch maximum TU depth of first sub partition to limit 
recursion of others
 for (uint32_t i = 0; i < cuGeom.numPartitions / 4; i++)
@@ -2937,12 +2946,12 @@
 bool bSaveTUData = false, bLoadTUData = false;
 uint32_t idx = 0;
 
-if (m_param->limitTU == X265_TU_LIMIT_DFS && m_maxTUDepth)
+if ((m_param->limitTU & X265_TU_LIMIT_DFS) && m_maxTUDepth)
 {
 uint32_t log2MaxTrSize = cuGeom.log2CUSize - m_maxTUDepth;
 bCheckSplit = log2TrSize > log2MaxTrSize;
 }
-else if (m_param->limitTU == X265_TU_LIMIT_BFS && splitMore >= 0)
+else if ((m_param->limitTU & X265_TU_LIMIT_BFS) && splitMore >= 0)
 {
 if (bCheckSplit && bCheckFull && tuDepth)
 {
@@ -3488,7 +3497,7 @@
 {
 if (splitCost.rdcost < fullCost.rdcost)
 {
-if (m_param->limitTU == X265_TU_LIMIT_BFS)
+if (m_param->limitTU & X265_TU_LIMIT_BFS)
 {
 uint32_t nextlog2TrSize = cuGeom.log2CUSize - (tuDepth + 
1);
 bool nextSplit = nextlog2TrSize > depthRange[0];
diff -r 4c1652f3884f -r c5295126f248 source/encoder/search.h
--- a/source/encoder/search.h   Tue Nov 15 11:16:04 2016 +0530
+++ b/source/encoder/search.h   Fri Nov 18 11:49:05 2016 +0530
@@ -276,7 +276,9 @@
 boolm_bFrameParallel;
 uint32_tm_numLayers;
 uint32_tm_refLagPixels;
+
 uint32_tm_maxTUDepth;
+uint16_tm_limitTU;
 
 int16_t m_sliceMaxY;
 int16_t m_sliceMinY;
___
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel


[x265] [PATCH 2 of 4] limitTU : use spatial and temporal CUs' TU depth to limit recursion

2016-11-18 Thread bhavna
# HG changeset patch
# User Bhavna Hariharan 
# Date 1479450968 -19800
#  Fri Nov 18 12:06:08 2016 +0530
# Node ID 40a0a322b26fc0516a72d4de9a941e18b5bb97b9
# Parent  c5295126f248411481a8361acfd2bc8b0636cedc
limitTU : use spatial and temporal CUs' TU depth to limit recursion

diff -r c5295126f248 -r 40a0a322b26f doc/reST/cli.rst
--- a/doc/reST/cli.rst  Fri Nov 18 11:49:05 2016 +0530
+++ b/doc/reST/cli.rst  Fri Nov 18 12:06:08 2016 +0530
@@ -869,13 +869,15 @@
partitions, in which case a TU split is implied and thus the
residual quad-tree begins one layer below the CU quad-tree.
 
-.. option:: --limit-tu <0|1|2>
+.. option:: --limit-tu <0..3>
 
Enables early exit from TU depth recursion, for inter coded blocks.
Level 1 - decides to recurse to next higher depth based on cost 
comparison of full size TU and split TU.
Level 2 - based on first split subTU's depth, limits recursion of
other split subTUs.
+   Level 3 - based on the average depth of the co-located and the neighbor
+   CUs' TU depth, limits recursion of the current CU.
 
Default: 0
 
diff -r c5295126f248 -r 40a0a322b26f source/common/cudata.cpp
--- a/source/common/cudata.cpp  Fri Nov 18 11:49:05 2016 +0530
+++ b/source/common/cudata.cpp  Fri Nov 18 12:06:08 2016 +0530
@@ -296,6 +296,9 @@
 /* initialize the remaining CU data in one memset */
 memset(m_cuDepth, 0, (frame.m_param->internalCsp == X265_CSP_I400 ? 
BytesPerPartition - 11 : BytesPerPartition - 7) * m_numPartitions);
 
+for (int8_t i = 0; i < NUM_TU_DEPTH; i++)
+m_refTuDepth[i] = -1;
+
 uint32_t widthInCU = m_slice->m_sps->numCuInWidth;
 m_cuLeft = (m_cuAddr % widthInCU) ? m_encData->getPicCTU(m_cuAddr - 1) : 
NULL;
 m_cuAbove = (m_cuAddr >= widthInCU) && !m_bFirstRowInSlice ? 
m_encData->getPicCTU(m_cuAddr - widthInCU) : NULL;
diff -r c5295126f248 -r 40a0a322b26f source/common/cudata.h
--- a/source/common/cudata.hFri Nov 18 11:49:05 2016 +0530
+++ b/source/common/cudata.hFri Nov 18 12:06:08 2016 +0530
@@ -28,6 +28,8 @@
 #include "slice.h"
 #include "mv.h"
 
+#define NUM_TU_DEPTH 21
+
 namespace X265_NS {
 // private namespace
 
@@ -204,6 +206,7 @@
 enum { BytesPerPartition = 21 };  // combined sizeof() of all per-part data
 
 coeff_t*  m_trCoeff[3];   // transformed coefficient buffer per 
plane
+int8_tm_refTuDepth[NUM_TU_DEPTH];   // TU depth of CU at depths 0, 
1 and 2
 
 MV*   m_mv[2];// array of motion vectors per list
 MV*   m_mvd[2];   // array of coded motion vector deltas 
per list
diff -r c5295126f248 -r 40a0a322b26f source/common/param.cpp
--- a/source/common/param.cpp   Fri Nov 18 11:49:05 2016 +0530
+++ b/source/common/param.cpp   Fri Nov 18 12:06:08 2016 +0530
@@ -1126,7 +1126,7 @@
   "QuadtreeTUMaxDepthInter must be less than or equal to the 
difference between log2(maxCUSize) and QuadtreeTULog2MinSize plus 1");
 CHECK((param->maxTUSize != 32 && param->maxTUSize != 16 && 
param->maxTUSize != 8 && param->maxTUSize != 4),
   "max TU size must be 4, 8, 16, or 32");
-CHECK(param->limitTU > 2, "Invalid limit-tu option, limit-TU must be 0, 1 
or 2");
+CHECK(param->limitTU > 3, "Invalid limit-tu option, limit-TU must be 
between 0 and 3");
 CHECK(param->maxNumMergeCand < 1, "MaxNumMergeCand must be 1 or greater.");
 CHECK(param->maxNumMergeCand > 5, "MaxNumMergeCand must be 5 or smaller.");
 
diff -r c5295126f248 -r 40a0a322b26f source/encoder/analysis.cpp
--- a/source/encoder/analysis.cpp   Fri Nov 18 11:49:05 2016 +0530
+++ b/source/encoder/analysis.cpp   Fri Nov 18 12:06:08 2016 +0530
@@ -203,6 +203,57 @@
 return *m_modeDepth[0].bestMode;
 }
 
+int32_t Analysis::loadTUDepth(CUGeom cuGeom, CUData parentCTU)
+{
+float predDepth = 0;
+CUData* neighbourCU;
+uint8_t count = 0;
+int32_t maxTUDepth = -1;
+neighbourCU = m_slice->m_refFrameList[0][0]->m_encData->m_picCTU;
+predDepth += neighbourCU->m_refTuDepth[cuGeom.geomRecurId];
+count++;
+if (m_slice->isInterB())
+{
+neighbourCU = m_slice->m_refFrameList[1][0]->m_encData->m_picCTU;
+predDepth += neighbourCU->m_refTuDepth[cuGeom.geomRecurId];
+count++;
+}
+if (parentCTU.m_cuAbove)
+{
+predDepth += parentCTU.m_cuAbove->m_refTuDepth[cuGeom.geomRecurId];
+count++;
+if (parentCTU.m_cuAboveLeft)
+{
+predDepth += 
parentCTU.m_cuAboveLeft->m_refTuDepth[cuGeom.geomRecurId];
+count++;
+}
+if (parentCTU.m_cuAboveRight)
+{
+predDepth += 
parentCTU.m_cuAboveRight->m_refTuDepth[cuGeom.geomRecurId];
+count++;
+}
+}
+if (parentCTU.m_cuLeft)
+{
+predDepth += parentCTU.m_cuLeft->m_refTuDepth[cuGeom.geomRecurId];
+count++;
+}
+predDepth /= count;
+
+if (predDepth == 0)
+