[x265] [PATCH] remove maxCTU size restriction in scaled save/load encodes
# HG changeset patch # User Bhavna Hariharan # Date 1519796358 -19800 # Wed Feb 28 11:09:18 2018 +0530 # Node ID 55eb3992299530de882829de0d3c0fea6d58b70d # Parent 7219376de42a1cc378ec957c886b511139d3c201 remove maxCTU size restriction in scaled save/load encodes The scaled save/load feature requires that the save encode has a maximum CTU size of 32. The 32x32 blocks are mapped to a 64x64 block in load encode. Due to this restriction we will be able to heirarchialy encode only 3 resolutions. WxH - ctu 16 2Wx2H - ctu 32 4Wx4H - ctu 64 diff -r 7219376de42a -r 55eb39922995 source/encoder/encoder.cpp --- a/source/encoder/encoder.cppThu Feb 15 02:21:26 2018 -0800 +++ b/source/encoder/encoder.cppWed Feb 28 11:09:18 2018 +0530 @@ -3272,10 +3272,10 @@ #define X265_FREAD(val, size, readSize, fileOffset, src)\ if (!m_param->bUseAnalysisFile)\ -{\ +{\ memcpy(val, src, (size * readSize));\ -}\ -else if (fread(val, size, readSize, fileOffset) != readSize)\ +}\ +else if (fread(val, size, readSize, fileOffset) != readSize)\ {\ x265_log(NULL, X265_LOG_ERROR, "Error reading analysis data\n");\ freeAnalysis(analysis);\ @@ -3334,10 +3334,37 @@ int scaledNumPartition = analysis->numPartitions; int factor = 1 << m_param->scaleFactor; +int numPartitions = analysis->numPartitions; +int numCUsInFrame = analysis->numCUsInFrame; +int extendedWidth, extendedHeight; +cuLocation cuLoc; +cuLoc.heightInCU = (m_param->sourceHeight + m_param->maxCUSize - 1) >> m_param->maxLog2CUSize; +cuLoc.widthInCU = (m_param->sourceWidth + m_param->maxCUSize - 1) >> m_param->maxLog2CUSize; +cuLoc.skipHeight = false; +cuLoc.skipWidth = false; + if (m_param->scaleFactor) -analysis->numPartitions *= factor; +{ +/* Allocate memory for scaled resoultion's numPartitions and numCUsInFrame*/ +analysis->numPartitions = m_param->num4x4Partitions; +analysis->numCUsInFrame = cuLoc.heightInCU * cuLoc.widthInCU; + +/* Set skipWidth/skipHeight flags when the out of bound pixels in lowRes is greater than half of maxCUSize */ +extendedWidth = ((m_param->sourceWidth / 2 + m_param->maxCUSize - 1) >> m_param->maxLog2CUSize) * m_param->maxCUSize; +extendedHeight = ((m_param->sourceHeight / 2 + m_param->maxCUSize - 1) >> m_param->maxLog2CUSize) * m_param->maxCUSize; +uint32_t outOfBoundaryLowres = extendedWidth - m_param->sourceWidth / 2; +if (outOfBoundaryLowres * 2 >= m_param->maxCUSize) +cuLoc.skipWidth = true; +uint32_t outOfBoundaryLowresH = extendedHeight - m_param->sourceHeight / 2; +if (outOfBoundaryLowresH * 2 >= m_param->maxCUSize) +cuLoc.skipHeight = true; +} + /* Memory is allocated for inter and intra analysis data based on the slicetype */ allocAnalysis(analysis); + +analysis->numPartitions = numPartitions * factor; +analysis->numCUsInFrame = numCUsInFrame; if (m_param->bDisableLookahead && m_rateControl->m_isVbv) { X265_FREAD(analysis->lookahead.intraVbvCost, sizeof(uint32_t), analysis->numCUsInFrame, m_analysisFileIn, picData->lookahead.intraVbvCost); @@ -3345,6 +3372,11 @@ X265_FREAD(analysis->lookahead.satdForVbv, sizeof(uint32_t), analysis->numCuInHeight, m_analysisFileIn, picData->lookahead.satdForVbv); X265_FREAD(analysis->lookahead.intraSatdForVbv, sizeof(uint32_t), analysis->numCuInHeight, m_analysisFileIn, picData->lookahead.intraSatdForVbv); } + +cuLoc.evenRowIndex = 0; +cuLoc.oddRowIndex = m_param->num4x4Partitions * cuLoc.widthInCU; +cuLoc.switchCondition = 0; // To switch between odd and even rows + if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType == X265_TYPE_I) { if (m_param->analysisReuseLevel < 2) @@ -3365,17 +3397,30 @@ for (uint32_t d = 0; d < depthBytes; d++) { int bytes = analysis->numPartitions >> (depthBuf[d] * 2); +int numCTUCopied = 1; + if (m_param->scaleFactor) { -if (depthBuf[d] == 0) -depthBuf[d] = 1; +if (!depthBuf[d]) //copy data of one 64x64 to four scaled 64x64 CTUs. +{ +bytes /= 4; +numCTUCopied = 4; +} + if (partSizes[d] == SIZE_NxN) partSizes[d] = SIZE_2Nx2N; +if ((depthBuf[d] > 1 && m_param->maxCUSize == 64) || (depthBuf[d] && m_param->maxCUSize != 64)) +depthBuf[d]--; } -memset(&((analysis_intra_data *)analysis->intraData)->depth[count], depthBuf[d], bytes); -memset(&((analysis_intra_data *)analysis->intraData)->chromaModes[count], modeBuf[d], bytes); -memset(&((analysis_intra_data *)analysis->intraData)->partSizes[count], par
[x265] [PATCH] remove maxCTU size restriction in scaled save/load encodes
# HG changeset patch # User Bhavna Hariharan # Date 1519796358 -19800 # Wed Feb 28 11:09:18 2018 +0530 # Node ID cf543136cbd0dd87e53bbee90358f157a47005ae # Parent 0b781d592c8e6e0917dc5f152129bebb201e529d remove maxCTU size restriction in scaled save/load encodes The scaled save/load feature requires that the save encode has a maximum CTU size of 32. The 32x32 blocks are mapped to a 64x64 block in load encode. Due to this restriction we will be able to heirarchialy encode only 3 resolutions. WxH - ctu 16 2Wx2H - ctu 32 4Wx4H - ctu 64 diff -r 0b781d592c8e -r cf543136cbd0 source/encoder/encoder.cpp --- a/source/encoder/encoder.cppMon Mar 05 11:24:22 2018 +0530 +++ b/source/encoder/encoder.cppWed Feb 28 11:09:18 2018 +0530 @@ -3334,10 +3334,34 @@ int scaledNumPartition = analysis->numPartitions; int factor = 1 << m_param->scaleFactor; +int numPartitions = analysis->numPartitions; +int numCUsInFrame = analysis->numCUsInFrame; +cuLocation cuLoc; +cuLoc.heightInCU = (m_param->sourceHeight + m_param->maxCUSize - 1) >> m_param->maxLog2CUSize; +cuLoc.widthInCU = (m_param->sourceWidth + m_param->maxCUSize - 1) >> m_param->maxLog2CUSize; + if (m_param->scaleFactor) -analysis->numPartitions *= factor; +{ +/* Allocate memory for scaled resoultion's numPartitions and numCUsInFrame*/ +analysis->numPartitions = m_param->num4x4Partitions; +analysis->numCUsInFrame = cuLoc.heightInCU * cuLoc.widthInCU; + +/* Set skipWidth/skipHeight flags when the out of bound pixels in lowRes is greater than half of maxCUSize */ +int extendedWidth = ((m_param->sourceWidth / 2 + m_param->maxCUSize - 1) >> m_param->maxLog2CUSize) * m_param->maxCUSize; +int extendedHeight = ((m_param->sourceHeight / 2 + m_param->maxCUSize - 1) >> m_param->maxLog2CUSize) * m_param->maxCUSize; +uint32_t outOfBoundaryLowres = extendedWidth - m_param->sourceWidth / 2; +if (outOfBoundaryLowres * 2 >= m_param->maxCUSize) +cuLoc.skipWidth = true; +uint32_t outOfBoundaryLowresH = extendedHeight - m_param->sourceHeight / 2; +if (outOfBoundaryLowresH * 2 >= m_param->maxCUSize) +cuLoc.skipHeight = true; +} + /* Memory is allocated for inter and intra analysis data based on the slicetype */ allocAnalysis(analysis); + +analysis->numPartitions = numPartitions * factor; +analysis->numCUsInFrame = numCUsInFrame; if (m_param->bDisableLookahead && m_rateControl->m_isVbv) { X265_FREAD(analysis->lookahead.intraVbvCost, sizeof(uint32_t), analysis->numCUsInFrame, m_analysisFileIn, picData->lookahead.intraVbvCost); @@ -3345,6 +3369,11 @@ X265_FREAD(analysis->lookahead.satdForVbv, sizeof(uint32_t), analysis->numCuInHeight, m_analysisFileIn, picData->lookahead.satdForVbv); X265_FREAD(analysis->lookahead.intraSatdForVbv, sizeof(uint32_t), analysis->numCuInHeight, m_analysisFileIn, picData->lookahead.intraSatdForVbv); } + +cuLoc.evenRowIndex = 0; +cuLoc.oddRowIndex = m_param->num4x4Partitions * cuLoc.widthInCU; +cuLoc.switchCondition = 0; // To switch between odd and even rows + if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType == X265_TYPE_I) { if (m_param->analysisReuseLevel < 2) @@ -3365,17 +3394,30 @@ for (uint32_t d = 0; d < depthBytes; d++) { int bytes = analysis->numPartitions >> (depthBuf[d] * 2); +int numCTUCopied = 1; + if (m_param->scaleFactor) { -if (depthBuf[d] == 0) -depthBuf[d] = 1; +if (!depthBuf[d]) //copy data of one 64x64 to four scaled 64x64 CTUs. +{ +bytes /= 4; +numCTUCopied = 4; +} + if (partSizes[d] == SIZE_NxN) partSizes[d] = SIZE_2Nx2N; +if ((depthBuf[d] > 1 && m_param->maxCUSize == 64) || (depthBuf[d] && m_param->maxCUSize != 64)) +depthBuf[d]--; } -memset(&((analysis_intra_data *)analysis->intraData)->depth[count], depthBuf[d], bytes); -memset(&((analysis_intra_data *)analysis->intraData)->chromaModes[count], modeBuf[d], bytes); -memset(&((analysis_intra_data *)analysis->intraData)->partSizes[count], partSizes[d], bytes); -count += bytes; +for (int numCTU = 0; numCTU < numCTUCopied; numCTU++) +{ +memset(&((analysis_intra_data *)analysis->intraData)->depth[count], depthBuf[d], bytes); +memset(&((analysis_intra_data *)analysis->intraData)->chromaModes[count], modeBuf[d], bytes); +memset(&((analysis_intra_data *)analysis->intraData)->partSizes[count], partSizes[d], bytes); +count += bytes; +if (m_param->scaleFactor) +
[x265] [PATCH] remove maxCTU size restriction in scaled save/load encodes
# HG changeset patch # User Bhavna Hariharan # Date 1519796358 -19800 # Wed Feb 28 11:09:18 2018 +0530 # Node ID ce647bfa20e203ed1aeb8f944326ac15cb74 # Parent 0b781d592c8e6e0917dc5f152129bebb201e529d remove maxCTU size restriction in scaled save/load encodes The scaled save/load feature requires that the save encode has a maximum CTU size of 32. The 32x32 blocks are mapped to a 64x64 block in load encode. Due to this restriction we will be able to heirarchialy encode only 3 resolutions. WxH - ctu 16 2Wx2H - ctu 32 4Wx4H - ctu 64 diff -r 0b781d592c8e -r ce647bfa20e2 source/encoder/encoder.cpp --- a/source/encoder/encoder.cppMon Mar 05 11:24:22 2018 +0530 +++ b/source/encoder/encoder.cppWed Feb 28 11:09:18 2018 +0530 @@ -3334,10 +3334,33 @@ int scaledNumPartition = analysis->numPartitions; int factor = 1 << m_param->scaleFactor; +int numPartitions = analysis->numPartitions; +int numCUsInFrame = analysis->numCUsInFrame; +cuLocation cuLoc; +cuLoc.init(m_param); + if (m_param->scaleFactor) -analysis->numPartitions *= factor; +{ +/* Allocate memory for scaled resoultion's numPartitions and numCUsInFrame*/ +analysis->numPartitions = m_param->num4x4Partitions; +analysis->numCUsInFrame = cuLoc.heightInCU * cuLoc.widthInCU; + +/* Set skipWidth/skipHeight flags when the out of bound pixels in lowRes is greater than half of maxCUSize */ +int extendedWidth = ((m_param->sourceWidth / 2 + m_param->maxCUSize - 1) >> m_param->maxLog2CUSize) * m_param->maxCUSize; +int extendedHeight = ((m_param->sourceHeight / 2 + m_param->maxCUSize - 1) >> m_param->maxLog2CUSize) * m_param->maxCUSize; +uint32_t outOfBoundaryLowres = extendedWidth - m_param->sourceWidth / 2; +if (outOfBoundaryLowres * 2 >= m_param->maxCUSize) +cuLoc.skipWidth = true; +uint32_t outOfBoundaryLowresH = extendedHeight - m_param->sourceHeight / 2; +if (outOfBoundaryLowresH * 2 >= m_param->maxCUSize) +cuLoc.skipHeight = true; +} + /* Memory is allocated for inter and intra analysis data based on the slicetype */ allocAnalysis(analysis); + +analysis->numPartitions = numPartitions * factor; +analysis->numCUsInFrame = numCUsInFrame; if (m_param->bDisableLookahead && m_rateControl->m_isVbv) { X265_FREAD(analysis->lookahead.intraVbvCost, sizeof(uint32_t), analysis->numCUsInFrame, m_analysisFileIn, picData->lookahead.intraVbvCost); @@ -3345,6 +3368,7 @@ X265_FREAD(analysis->lookahead.satdForVbv, sizeof(uint32_t), analysis->numCuInHeight, m_analysisFileIn, picData->lookahead.satdForVbv); X265_FREAD(analysis->lookahead.intraSatdForVbv, sizeof(uint32_t), analysis->numCuInHeight, m_analysisFileIn, picData->lookahead.intraSatdForVbv); } + if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType == X265_TYPE_I) { if (m_param->analysisReuseLevel < 2) @@ -3361,21 +3385,34 @@ X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, m_analysisFileIn, intraPic->chromaModes); X265_FREAD(partSizes, sizeof(uint8_t), depthBytes, m_analysisFileIn, intraPic->partSizes); -size_t count = 0; +uint32_t count = 0; for (uint32_t d = 0; d < depthBytes; d++) { int bytes = analysis->numPartitions >> (depthBuf[d] * 2); +int numCTUCopied = 1; + if (m_param->scaleFactor) { -if (depthBuf[d] == 0) -depthBuf[d] = 1; +if (!depthBuf[d]) //copy data of one 64x64 to four scaled 64x64 CTUs. +{ +bytes /= 4; +numCTUCopied = 4; +} + if (partSizes[d] == SIZE_NxN) partSizes[d] = SIZE_2Nx2N; +if ((depthBuf[d] > 1 && m_param->maxCUSize == 64) || (depthBuf[d] && m_param->maxCUSize != 64)) +depthBuf[d]--; } -memset(&((analysis_intra_data *)analysis->intraData)->depth[count], depthBuf[d], bytes); -memset(&((analysis_intra_data *)analysis->intraData)->chromaModes[count], modeBuf[d], bytes); -memset(&((analysis_intra_data *)analysis->intraData)->partSizes[count], partSizes[d], bytes); -count += bytes; +for (int numCTU = 0; numCTU < numCTUCopied; numCTU++) +{ +memset(&((analysis_intra_data *)analysis->intraData)->depth[count], depthBuf[d], bytes); +memset(&((analysis_intra_data *)analysis->intraData)->chromaModes[count], modeBuf[d], bytes); +memset(&((analysis_intra_data *)analysis->intraData)->partSizes[count], partSizes[d], bytes); +count += bytes; +if (m_param->scaleFactor) +d += getCUIndex(&cuLoc, &count, bytes, 1); +} } if (!m_
Re: [x265] [PATCH] remove maxCTU size restriction in scaled save/load encodes
On Mon, Mar 5, 2018 at 2:19 PM, wrote: > # HG changeset patch > # User Bhavna Hariharan > # Date 1519796358 -19800 > # Wed Feb 28 11:09:18 2018 +0530 > # Node ID ce647bfa20e203ed1aeb8f944326ac15cb74 > # Parent 0b781d592c8e6e0917dc5f152129bebb201e529d > remove maxCTU size restriction in scaled save/load encodes > > The scaled save/load feature requires that the save encode has a maximum > CTU > size of 32. The 32x32 blocks are mapped to a 64x64 block in load encode. > Due to > this restriction we will be able to heirarchialy encode only 3 resolutions. > WxH - ctu 16 > 2Wx2H - ctu 32 > 4Wx4H - ctu 64 > > diff -r 0b781d592c8e -r ce647bfa20e2 source/encoder/encoder.cpp > --- a/source/encoder/encoder.cppMon Mar 05 11:24:22 2018 +0530 > +++ b/source/encoder/encoder.cppWed Feb 28 11:09:18 2018 +0530 > @@ -3334,10 +3334,33 @@ > int scaledNumPartition = analysis->numPartitions; > int factor = 1 << m_param->scaleFactor; > > +int numPartitions = analysis->numPartitions; > +int numCUsInFrame = analysis->numCUsInFrame; > +cuLocation cuLoc; > +cuLoc.init(m_param); > + > if (m_param->scaleFactor) > -analysis->numPartitions *= factor; > +{ > +/* Allocate memory for scaled resoultion's numPartitions and > numCUsInFrame*/ > +analysis->numPartitions = m_param->num4x4Partitions; > +analysis->numCUsInFrame = cuLoc.heightInCU * cuLoc.widthInCU; > + > +/* Set skipWidth/skipHeight flags when the out of bound pixels in > lowRes is greater than half of maxCUSize */ > +int extendedWidth = ((m_param->sourceWidth / 2 + > m_param->maxCUSize - 1) >> m_param->maxLog2CUSize) * m_param->maxCUSize; > +int extendedHeight = ((m_param->sourceHeight / 2 + > m_param->maxCUSize - 1) >> m_param->maxLog2CUSize) * m_param->maxCUSize; > +uint32_t outOfBoundaryLowres = extendedWidth - > m_param->sourceWidth / 2; > +if (outOfBoundaryLowres * 2 >= m_param->maxCUSize) > +cuLoc.skipWidth = true; > +uint32_t outOfBoundaryLowresH = extendedHeight - > m_param->sourceHeight / 2; > +if (outOfBoundaryLowresH * 2 >= m_param->maxCUSize) > +cuLoc.skipHeight = true; > +} > + > /* Memory is allocated for inter and intra analysis data based on the > slicetype */ > allocAnalysis(analysis); > + > +analysis->numPartitions = numPartitions * factor; > +analysis->numCUsInFrame = numCUsInFrame; > if (m_param->bDisableLookahead && m_rateControl->m_isVbv) > { > X265_FREAD(analysis->lookahead.intraVbvCost, sizeof(uint32_t), > analysis->numCUsInFrame, m_analysisFileIn, picData->lookahead. > intraVbvCost); > @@ -3345,6 +3368,7 @@ > X265_FREAD(analysis->lookahead.satdForVbv, sizeof(uint32_t), > analysis->numCuInHeight, m_analysisFileIn, picData->lookahead.satdForVbv); > X265_FREAD(analysis->lookahead.intraSatdForVbv, > sizeof(uint32_t), analysis->numCuInHeight, m_analysisFileIn, > picData->lookahead.intraSatdForVbv); > } > + > if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType == > X265_TYPE_I) > { > if (m_param->analysisReuseLevel < 2) > @@ -3361,21 +3385,34 @@ > X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, > m_analysisFileIn, intraPic->chromaModes); > X265_FREAD(partSizes, sizeof(uint8_t), depthBytes, > m_analysisFileIn, intraPic->partSizes); > > -size_t count = 0; > +uint32_t count = 0; > for (uint32_t d = 0; d < depthBytes; d++) > { > int bytes = analysis->numPartitions >> (depthBuf[d] * 2); > +int numCTUCopied = 1; > + > if (m_param->scaleFactor) > { > -if (depthBuf[d] == 0) > -depthBuf[d] = 1; > +if (!depthBuf[d]) //copy data of one 64x64 to four scaled > 64x64 CTUs. > +{ > +bytes /= 4; > +numCTUCopied = 4; > +} > + > if (partSizes[d] == SIZE_NxN) > partSizes[d] = SIZE_2Nx2N; > +if ((depthBuf[d] > 1 && m_param->maxCUSize == 64) || > (depthBuf[d] && m_param->maxCUSize != 64)) > +depthBuf[d]--; > } > -memset(&((analysis_intra_data > *)analysis->intraData)->depth[count], > depthBuf[d], bytes); > -memset(&((analysis_intra_data > *)analysis->intraData)->chromaModes[count], > modeBuf[d], bytes); > -memset(&((analysis_intra_data > *)analysis->intraData)->partSizes[count], > partSizes[d], bytes); > -count += bytes; > +for (int numCTU = 0; numCTU < numCTUCopied; numCTU++) > +{ > +memset(&((analysis_intra_data > *)analysis->intraData)->depth[count], depthBuf[d], bytes); > +memset(&((analysis_intra_data > *)analysis->intraData)->chromaModes[count], > modeBuf[d], bytes); > +memset(&((a