[x265] [PATCH 3 of 4] limitTU : use neighbouring CUs' TU depth to limit 1st subTU's depth
# HG changeset patch # User Bhavna Hariharan# Date 1479454023 -19800 # Fri Nov 18 12:57:03 2016 +0530 # Node ID df9a0c94631c9abac75e5a7cf4bd48ff55adced8 # Parent 40a0a322b26fc0516a72d4de9a941e18b5bb97b9 limitTU : use neighbouring CUs' TU depth to limit 1st subTU's depth diff -r 40a0a322b26f -r df9a0c94631c doc/reST/cli.rst --- a/doc/reST/cli.rst Fri Nov 18 12:06:08 2016 +0530 +++ b/doc/reST/cli.rst Fri Nov 18 12:57:03 2016 +0530 @@ -869,7 +869,7 @@ partitions, in which case a TU split is implied and thus the residual quad-tree begins one layer below the CU quad-tree. -.. option:: --limit-tu <0..3> +.. option:: --limit-tu <0..4> Enables early exit from TU depth recursion, for inter coded blocks. Level 1 - decides to recurse to next higher depth based on cost @@ -878,6 +878,9 @@ other split subTUs. Level 3 - based on the average depth of the co-located and the neighbor CUs' TU depth, limits recursion of the current CU. + Level 4 - uses the depth of the neighbouring/ co-located CUs TU depth + to limit the 1st subTU depth. The 1st subTU depth is taken as the + limiting depth for the other subTUs. Default: 0 diff -r 40a0a322b26f -r df9a0c94631c source/common/param.cpp --- a/source/common/param.cpp Fri Nov 18 12:06:08 2016 +0530 +++ b/source/common/param.cpp Fri Nov 18 12:57:03 2016 +0530 @@ -1126,7 +1126,7 @@ "QuadtreeTUMaxDepthInter must be less than or equal to the difference between log2(maxCUSize) and QuadtreeTULog2MinSize plus 1"); CHECK((param->maxTUSize != 32 && param->maxTUSize != 16 && param->maxTUSize != 8 && param->maxTUSize != 4), "max TU size must be 4, 8, 16, or 32"); -CHECK(param->limitTU > 3, "Invalid limit-tu option, limit-TU must be between 0 and 3"); +CHECK(param->limitTU > 4, "Invalid limit-tu option, limit-TU must be between 0 and 4"); CHECK(param->maxNumMergeCand < 1, "MaxNumMergeCand must be 1 or greater."); CHECK(param->maxNumMergeCand > 5, "MaxNumMergeCand must be 5 or smaller."); diff -r 40a0a322b26f -r df9a0c94631c source/encoder/search.cpp --- a/source/encoder/search.cpp Fri Nov 18 12:06:08 2016 +0530 +++ b/source/encoder/search.cpp Fri Nov 18 12:57:03 2016 +0530 @@ -103,6 +103,8 @@ m_limitTU = X265_TU_LIMIT_DFS; else if (m_param->limitTU == 3) m_limitTU = X265_TU_LIMIT_NEIGH; +else if (m_param->limitTU == 4) +m_limitTU = X265_TU_LIMIT_DFS + X265_TU_LIMIT_NEIGH; } /* these are indexed by qtLayer (log2size - 2) so nominally 0=4x4, 1=8x8, 2=16x16, 3=32x32 @@ -2638,13 +2640,20 @@ m_entropyCoder.load(m_rqt[depth].cur); -if (m_limitTU & X265_TU_LIMIT_DFS) +if ((m_limitTU & X265_TU_LIMIT_DFS) && !(m_limitTU & X265_TU_LIMIT_NEIGH)) m_maxTUDepth = -1; else if (m_limitTU & X265_TU_LIMIT_BFS) memset(_cacheTU, 0, sizeof(TUInfoCache)); Cost costs; -estimateResidualQT(interMode, cuGeom, 0, 0, *resiYuv, costs, tuDepthRange); +if ((m_limitTU & X265_TU_LIMIT_DFS) && (m_limitTU & X265_TU_LIMIT_NEIGH)) +{ +int32_t tempDepth = m_maxTUDepth; +estimateResidualQT(interMode, cuGeom, 0, 0, *resiYuv, costs, tuDepthRange); +m_maxTUDepth = tempDepth; +} +else +estimateResidualQT(interMode, cuGeom, 0, 0, *resiYuv, costs, tuDepthRange); uint32_t tqBypass = cu.m_tqBypass[0]; if (!tqBypass) @@ -2905,8 +2914,9 @@ { if ((m_limitTU & X265_TU_LIMIT_DFS) && tuDepth == 0 && qIdx == 1) { +m_maxTUDepth = cu.m_tuDepth[0]; // Fetch maximum TU depth of first sub partition to limit recursion of others -for (uint32_t i = 0; i < cuGeom.numPartitions / 4; i++) +for (uint32_t i = 1; i < cuGeom.numPartitions / 4; i++) m_maxTUDepth = X265_MAX(m_maxTUDepth, cu.m_tuDepth[i]); } estimateResidualQT(mode, cuGeom, qPartIdx, tuDepth + 1, resiYuv, splitCost, depthRange, splitMore); ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] [PATCH 4 of 4] tests: update command lines to cover limitTU 3 and 4
# HG changeset patch # User Bhavna Hariharan# Date 1479302428 -19800 # Wed Nov 16 18:50:28 2016 +0530 # Node ID b476a6420b1ccfebe53bee4ed715b556f443e7f6 # Parent df9a0c94631c9abac75e5a7cf4bd48ff55adced8 tests: update command lines to cover limitTU 3 and 4 diff -r df9a0c94631c -r b476a6420b1c source/test/regression-tests.txt --- a/source/test/regression-tests.txt Fri Nov 18 12:57:03 2016 +0530 +++ b/source/test/regression-tests.txt Wed Nov 16 18:50:28 2016 +0530 @@ -19,10 +19,10 @@ BasketballDrive_1920x1080_50.y4m,--preset medium --keyint -1 --nr-inter 100 -F4 --no-sao BasketballDrive_1920x1080_50.y4m,--preset medium --no-cutree --analysis-mode=save --bitrate 7000 --limit-modes,--preset medium --no-cutree --analysis-mode=load --bitrate 7000 --limit-modes BasketballDrive_1920x1080_50.y4m,--preset slow --nr-intra 100 -F4 --aq-strength 3 --qg-size 16 --limit-refs 1 -BasketballDrive_1920x1080_50.y4m,--preset slower --lossless --chromaloc 3 --subme 0 +BasketballDrive_1920x1080_50.y4m,--preset slower --lossless --chromaloc 3 --subme 0 --limit-tu 4 BasketballDrive_1920x1080_50.y4m,--preset slower --no-cutree --analysis-mode=save --bitrate 7000,--preset slower --no-cutree --analysis-mode=load --bitrate 7000 -BasketballDrive_1920x1080_50.y4m,--preset veryslow --crf 4 --cu-lossless --pmode --limit-refs 1 --aq-mode 3 --limit-tu 1 -BasketballDrive_1920x1080_50.y4m,--preset veryslow --no-cutree --analysis-mode=save --bitrate 7000 --tskip-fast,--preset veryslow --no-cutree --analysis-mode=load --bitrate 7000 --tskip-fast +BasketballDrive_1920x1080_50.y4m,--preset veryslow --crf 4 --cu-lossless --pmode --limit-refs 1 --aq-mode 3 --limit-tu 3 +BasketballDrive_1920x1080_50.y4m,--preset veryslow --no-cutree --analysis-mode=save --bitrate 7000 --tskip-fast --limit-tu 4,--preset veryslow --no-cutree --analysis-mode=load --bitrate 7000 --tskip-fast --limit-tu 4 BasketballDrive_1920x1080_50.y4m,--preset veryslow --recon-y4m-exec "ffplay -i pipe:0 -autoexit" Coastguard-4k.y4m,--preset ultrafast --recon-y4m-exec "ffplay -i pipe:0 -autoexit" Coastguard-4k.y4m,--preset superfast --tune grain --overscan=crop @@ -47,7 +47,7 @@ DucksAndLegs_1920x1080_60_10bit_422.yuv,--preset slow --temporal-layers --no-psy-rd --qg-size 32 --limit-refs 0 --cu-lossless DucksAndLegs_1920x1080_60_10bit_444.yuv,--preset veryfast --weightp --nr-intra 1000 -F4 DucksAndLegs_1920x1080_60_10bit_444.yuv,--preset medium --nr-inter 500 -F4 --no-psy-rdoq -DucksAndLegs_1920x1080_60_10bit_444.yuv,--preset slower --no-weightp --rdoq-level 0 --limit-refs 3 +DucksAndLegs_1920x1080_60_10bit_444.yuv,--preset slower --no-weightp --rdoq-level 0 --limit-refs 3 --tu-inter-depth 4 --limit-tu 3 DucksAndLegs_1920x1080_60_10bit_422.yuv,--preset fast --no-cutree --analysis-mode=save --bitrate 3000 --early-skip --tu-inter-depth 3 --limit-tu 1,--preset fast --no-cutree --analysis-mode=load --bitrate 3000 --early-skip --tu-inter-depth 3 --limit-tu 1 FourPeople_1280x720_60.y4m,--preset superfast --no-wpp --lookahead-slices 2 FourPeople_1280x720_60.y4m,--preset veryfast --aq-mode 2 --aq-strength 1.5 --qg-size 8 @@ -80,7 +80,7 @@ RaceHorses_416x240_30.y4m,--preset superfast --no-cutree RaceHorses_416x240_30.y4m,--preset medium --tskip-fast --tskip RaceHorses_416x240_30.y4m,--preset slower --keyint -1 --rdoq-level 0 --limit-tu 2 -RaceHorses_416x240_30.y4m,--preset veryslow --tskip-fast --tskip --limit-refs 3 --limit-tu 2 +RaceHorses_416x240_30.y4m,--preset veryslow --tskip-fast --tskip --limit-refs 3 --limit-tu 3 RaceHorses_416x240_30_10bit.yuv,--preset ultrafast --tune psnr --limit-refs 1 RaceHorses_416x240_30_10bit.yuv,--preset veryfast --weightb RaceHorses_416x240_30_10bit.yuv,--preset faster --rdoq-level 0 --dither @@ -110,7 +110,7 @@ ducks_take_off_420_720p50.y4m,--preset veryslow --constrained-intra --bframes 2 mobile_calendar_422_ntsc.y4m,--preset superfast --weightp mobile_calendar_422_ntsc.y4m,--preset medium --bitrate 500 -F4 -mobile_calendar_422_ntsc.y4m,--preset slower --tskip --tskip-fast --limit-tu 2 +mobile_calendar_422_ntsc.y4m,--preset slower --tskip --tskip-fast --limit-tu 4 mobile_calendar_422_ntsc.y4m,--preset veryslow --tskip --limit-refs 2 old_town_cross_444_720p50.y4m,--preset ultrafast --weightp --min-cu 32 old_town_cross_444_720p50.y4m,--preset superfast --weightp --min-cu 16 --limit-modes @@ -120,7 +120,7 @@ old_town_cross_444_720p50.y4m,--preset medium --keyint -1 --no-weightp --ref 6 old_town_cross_444_720p50.y4m,--preset slow --rdoq-level 1 --early-skip --ref 7 --no-b-pyramid old_town_cross_444_720p50.y4m,--preset slower --crf 4 --cu-lossless -old_town_cross_444_720p50.y4m,--preset veryslow --max-tu-size 4 --min-cu-size 32 --limit-tu 1 +old_town_cross_444_720p50.y4m,--preset veryslow --max-tu-size 4 --min-cu-size 32 --limit-tu 4 parkrun_ter_720p50.y4m,--preset medium --no-open-gop --sao-non-deblock --crf 4 --cu-lossless parkrun_ter_720p50.y4m,--preset
[x265] [PATCH 1 of 4] limitTU : modify condition for limitTU 1 and 2
# HG changeset patch # User Bhavna Hariharan# Date 1479449945 -19800 # Fri Nov 18 11:49:05 2016 +0530 # Node ID c5295126f248411481a8361acfd2bc8b0636cedc # Parent 4c1652f3884fba9fab4c589dd057b12e6bf33d5b limitTU : modify condition for limitTU 1 and 2 diff -r 4c1652f3884f -r c5295126f248 source/encoder/search.cpp --- a/source/encoder/search.cpp Tue Nov 15 11:16:04 2016 +0530 +++ b/source/encoder/search.cpp Fri Nov 18 11:49:05 2016 +0530 @@ -94,6 +94,15 @@ uint32_t sizeC = sizeL >> (m_hChromaShift + m_vChromaShift); uint32_t numPartitions = 1 << (maxLog2CUSize - LOG2_UNIT_SIZE) * 2; +m_limitTU = 0; +if (m_param->limitTU) +{ +if (m_param->limitTU == 1) +m_limitTU = X265_TU_LIMIT_BFS; +else if (m_param->limitTU == 2) +m_limitTU = X265_TU_LIMIT_DFS; +} + /* these are indexed by qtLayer (log2size - 2) so nominally 0=4x4, 1=8x8, 2=16x16, 3=32x32 * the coeffRQT and reconQtYuv are allocated to the max CU size at every depth. The parts * which are reconstructed at each depth are valid. At the end, the transform depth table @@ -2621,9 +2630,9 @@ m_entropyCoder.load(m_rqt[depth].cur); -if (m_param->limitTU == X265_TU_LIMIT_DFS) +if (m_param->limitTU & X265_TU_LIMIT_DFS) m_maxTUDepth = 0; -else if (m_param->limitTU == X265_TU_LIMIT_BFS) +else if (m_param->limitTU & X265_TU_LIMIT_BFS) memset(_cacheTU, 0, sizeof(TUInfoCache)); Cost costs; @@ -2886,7 +2895,7 @@ uint32_t ycbf = 0, ucbf = 0, vcbf = 0; for (uint32_t qIdx = 0, qPartIdx = absPartIdx; qIdx < 4; ++qIdx, qPartIdx += qNumParts) { -if (m_param->limitTU == X265_TU_LIMIT_DFS && tuDepth == 0 && qIdx == 1) +if ((m_param->limitTU & X265_TU_LIMIT_DFS) && tuDepth == 0 && qIdx == 1) { // Fetch maximum TU depth of first sub partition to limit recursion of others for (uint32_t i = 0; i < cuGeom.numPartitions / 4; i++) @@ -2937,12 +2946,12 @@ bool bSaveTUData = false, bLoadTUData = false; uint32_t idx = 0; -if (m_param->limitTU == X265_TU_LIMIT_DFS && m_maxTUDepth) +if ((m_param->limitTU & X265_TU_LIMIT_DFS) && m_maxTUDepth) { uint32_t log2MaxTrSize = cuGeom.log2CUSize - m_maxTUDepth; bCheckSplit = log2TrSize > log2MaxTrSize; } -else if (m_param->limitTU == X265_TU_LIMIT_BFS && splitMore >= 0) +else if ((m_param->limitTU & X265_TU_LIMIT_BFS) && splitMore >= 0) { if (bCheckSplit && bCheckFull && tuDepth) { @@ -3488,7 +3497,7 @@ { if (splitCost.rdcost < fullCost.rdcost) { -if (m_param->limitTU == X265_TU_LIMIT_BFS) +if (m_param->limitTU & X265_TU_LIMIT_BFS) { uint32_t nextlog2TrSize = cuGeom.log2CUSize - (tuDepth + 1); bool nextSplit = nextlog2TrSize > depthRange[0]; diff -r 4c1652f3884f -r c5295126f248 source/encoder/search.h --- a/source/encoder/search.h Tue Nov 15 11:16:04 2016 +0530 +++ b/source/encoder/search.h Fri Nov 18 11:49:05 2016 +0530 @@ -276,7 +276,9 @@ boolm_bFrameParallel; uint32_tm_numLayers; uint32_tm_refLagPixels; + uint32_tm_maxTUDepth; +uint16_tm_limitTU; int16_t m_sliceMaxY; int16_t m_sliceMinY; ___ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel
[x265] [PATCH 2 of 4] limitTU : use spatial and temporal CUs' TU depth to limit recursion
# HG changeset patch # User Bhavna Hariharan# Date 1479450968 -19800 # Fri Nov 18 12:06:08 2016 +0530 # Node ID 40a0a322b26fc0516a72d4de9a941e18b5bb97b9 # Parent c5295126f248411481a8361acfd2bc8b0636cedc limitTU : use spatial and temporal CUs' TU depth to limit recursion diff -r c5295126f248 -r 40a0a322b26f doc/reST/cli.rst --- a/doc/reST/cli.rst Fri Nov 18 11:49:05 2016 +0530 +++ b/doc/reST/cli.rst Fri Nov 18 12:06:08 2016 +0530 @@ -869,13 +869,15 @@ partitions, in which case a TU split is implied and thus the residual quad-tree begins one layer below the CU quad-tree. -.. option:: --limit-tu <0|1|2> +.. option:: --limit-tu <0..3> Enables early exit from TU depth recursion, for inter coded blocks. Level 1 - decides to recurse to next higher depth based on cost comparison of full size TU and split TU. Level 2 - based on first split subTU's depth, limits recursion of other split subTUs. + Level 3 - based on the average depth of the co-located and the neighbor + CUs' TU depth, limits recursion of the current CU. Default: 0 diff -r c5295126f248 -r 40a0a322b26f source/common/cudata.cpp --- a/source/common/cudata.cpp Fri Nov 18 11:49:05 2016 +0530 +++ b/source/common/cudata.cpp Fri Nov 18 12:06:08 2016 +0530 @@ -296,6 +296,9 @@ /* initialize the remaining CU data in one memset */ memset(m_cuDepth, 0, (frame.m_param->internalCsp == X265_CSP_I400 ? BytesPerPartition - 11 : BytesPerPartition - 7) * m_numPartitions); +for (int8_t i = 0; i < NUM_TU_DEPTH; i++) +m_refTuDepth[i] = -1; + uint32_t widthInCU = m_slice->m_sps->numCuInWidth; m_cuLeft = (m_cuAddr % widthInCU) ? m_encData->getPicCTU(m_cuAddr - 1) : NULL; m_cuAbove = (m_cuAddr >= widthInCU) && !m_bFirstRowInSlice ? m_encData->getPicCTU(m_cuAddr - widthInCU) : NULL; diff -r c5295126f248 -r 40a0a322b26f source/common/cudata.h --- a/source/common/cudata.hFri Nov 18 11:49:05 2016 +0530 +++ b/source/common/cudata.hFri Nov 18 12:06:08 2016 +0530 @@ -28,6 +28,8 @@ #include "slice.h" #include "mv.h" +#define NUM_TU_DEPTH 21 + namespace X265_NS { // private namespace @@ -204,6 +206,7 @@ enum { BytesPerPartition = 21 }; // combined sizeof() of all per-part data coeff_t* m_trCoeff[3]; // transformed coefficient buffer per plane +int8_tm_refTuDepth[NUM_TU_DEPTH]; // TU depth of CU at depths 0, 1 and 2 MV* m_mv[2];// array of motion vectors per list MV* m_mvd[2]; // array of coded motion vector deltas per list diff -r c5295126f248 -r 40a0a322b26f source/common/param.cpp --- a/source/common/param.cpp Fri Nov 18 11:49:05 2016 +0530 +++ b/source/common/param.cpp Fri Nov 18 12:06:08 2016 +0530 @@ -1126,7 +1126,7 @@ "QuadtreeTUMaxDepthInter must be less than or equal to the difference between log2(maxCUSize) and QuadtreeTULog2MinSize plus 1"); CHECK((param->maxTUSize != 32 && param->maxTUSize != 16 && param->maxTUSize != 8 && param->maxTUSize != 4), "max TU size must be 4, 8, 16, or 32"); -CHECK(param->limitTU > 2, "Invalid limit-tu option, limit-TU must be 0, 1 or 2"); +CHECK(param->limitTU > 3, "Invalid limit-tu option, limit-TU must be between 0 and 3"); CHECK(param->maxNumMergeCand < 1, "MaxNumMergeCand must be 1 or greater."); CHECK(param->maxNumMergeCand > 5, "MaxNumMergeCand must be 5 or smaller."); diff -r c5295126f248 -r 40a0a322b26f source/encoder/analysis.cpp --- a/source/encoder/analysis.cpp Fri Nov 18 11:49:05 2016 +0530 +++ b/source/encoder/analysis.cpp Fri Nov 18 12:06:08 2016 +0530 @@ -203,6 +203,57 @@ return *m_modeDepth[0].bestMode; } +int32_t Analysis::loadTUDepth(CUGeom cuGeom, CUData parentCTU) +{ +float predDepth = 0; +CUData* neighbourCU; +uint8_t count = 0; +int32_t maxTUDepth = -1; +neighbourCU = m_slice->m_refFrameList[0][0]->m_encData->m_picCTU; +predDepth += neighbourCU->m_refTuDepth[cuGeom.geomRecurId]; +count++; +if (m_slice->isInterB()) +{ +neighbourCU = m_slice->m_refFrameList[1][0]->m_encData->m_picCTU; +predDepth += neighbourCU->m_refTuDepth[cuGeom.geomRecurId]; +count++; +} +if (parentCTU.m_cuAbove) +{ +predDepth += parentCTU.m_cuAbove->m_refTuDepth[cuGeom.geomRecurId]; +count++; +if (parentCTU.m_cuAboveLeft) +{ +predDepth += parentCTU.m_cuAboveLeft->m_refTuDepth[cuGeom.geomRecurId]; +count++; +} +if (parentCTU.m_cuAboveRight) +{ +predDepth += parentCTU.m_cuAboveRight->m_refTuDepth[cuGeom.geomRecurId]; +count++; +} +} +if (parentCTU.m_cuLeft) +{ +predDepth += parentCTU.m_cuLeft->m_refTuDepth[cuGeom.geomRecurId]; +count++; +} +predDepth /= count; + +if (predDepth == 0) +