On Mon, Mar 5, 2018 at 2:19 PM, <bha...@multicorewareinc.com> wrote: > # HG changeset patch > # User Bhavna Hariharan <bha...@multicorewareinc.com> > # Date 1519796358 -19800 > # Wed Feb 28 11:09:18 2018 +0530 > # Node ID ce647bfa20e203ed1aeb8f5555944326ac15cb74 > # Parent 0b781d592c8e6e0917dc5f152129bebb201e529d > remove maxCTU size restriction in scaled save/load encodes > > The scaled save/load feature requires that the save encode has a maximum > CTU > size of 32. The 32x32 blocks are mapped to a 64x64 block in load encode. > Due to > this restriction we will be able to heirarchialy encode only 3 resolutions. > WxH - ctu 16 > 2Wx2H - ctu 32 > 4Wx4H - ctu 64 > > diff -r 0b781d592c8e -r ce647bfa20e2 source/encoder/encoder.cpp > --- a/source/encoder/encoder.cpp Mon Mar 05 11:24:22 2018 +0530 > +++ b/source/encoder/encoder.cpp Wed Feb 28 11:09:18 2018 +0530 > @@ -3334,10 +3334,33 @@ > int scaledNumPartition = analysis->numPartitions; > int factor = 1 << m_param->scaleFactor; > > + int numPartitions = analysis->numPartitions; > + int numCUsInFrame = analysis->numCUsInFrame; > + cuLocation cuLoc; > + cuLoc.init(m_param); > + > if (m_param->scaleFactor) > - analysis->numPartitions *= factor; > + { > + /* Allocate memory for scaled resoultion's numPartitions and > numCUsInFrame*/ > + analysis->numPartitions = m_param->num4x4Partitions; > + analysis->numCUsInFrame = cuLoc.heightInCU * cuLoc.widthInCU; > + > + /* Set skipWidth/skipHeight flags when the out of bound pixels in > lowRes is greater than half of maxCUSize */ > + int extendedWidth = ((m_param->sourceWidth / 2 + > m_param->maxCUSize - 1) >> m_param->maxLog2CUSize) * m_param->maxCUSize; > + int extendedHeight = ((m_param->sourceHeight / 2 + > m_param->maxCUSize - 1) >> m_param->maxLog2CUSize) * m_param->maxCUSize; > + uint32_t outOfBoundaryLowres = extendedWidth - > m_param->sourceWidth / 2; > + if (outOfBoundaryLowres * 2 >= m_param->maxCUSize) > + cuLoc.skipWidth = true; > + uint32_t outOfBoundaryLowresH = extendedHeight - > m_param->sourceHeight / 2; > + if (outOfBoundaryLowresH * 2 >= m_param->maxCUSize) > + cuLoc.skipHeight = true; > + } > + > /* Memory is allocated for inter and intra analysis data based on the > slicetype */ > allocAnalysis(analysis); > + > + analysis->numPartitions = numPartitions * factor; > + analysis->numCUsInFrame = numCUsInFrame; > if (m_param->bDisableLookahead && m_rateControl->m_isVbv) > { > X265_FREAD(analysis->lookahead.intraVbvCost, sizeof(uint32_t), > analysis->numCUsInFrame, m_analysisFileIn, picData->lookahead. > intraVbvCost); > @@ -3345,6 +3368,7 @@ > X265_FREAD(analysis->lookahead.satdForVbv, sizeof(uint32_t), > analysis->numCuInHeight, m_analysisFileIn, picData->lookahead.satdForVbv); > X265_FREAD(analysis->lookahead.intraSatdForVbv, > sizeof(uint32_t), analysis->numCuInHeight, m_analysisFileIn, > picData->lookahead.intraSatdForVbv); > } > + > if (analysis->sliceType == X265_TYPE_IDR || analysis->sliceType == > X265_TYPE_I) > { > if (m_param->analysisReuseLevel < 2) > @@ -3361,21 +3385,34 @@ > X265_FREAD(modeBuf, sizeof(uint8_t), depthBytes, > m_analysisFileIn, intraPic->chromaModes); > X265_FREAD(partSizes, sizeof(uint8_t), depthBytes, > m_analysisFileIn, intraPic->partSizes); > > - size_t count = 0; > + uint32_t count = 0; > for (uint32_t d = 0; d < depthBytes; d++) > { > int bytes = analysis->numPartitions >> (depthBuf[d] * 2); > + int numCTUCopied = 1; > + > if (m_param->scaleFactor) > { > - if (depthBuf[d] == 0) > - depthBuf[d] = 1; > + if (!depthBuf[d]) //copy data of one 64x64 to four scaled > 64x64 CTUs. > + { > + bytes /= 4; > + numCTUCopied = 4; > + } > + > if (partSizes[d] == SIZE_NxN) > partSizes[d] = SIZE_2Nx2N; > + if ((depthBuf[d] > 1 && m_param->maxCUSize == 64) || > (depthBuf[d] && m_param->maxCUSize != 64)) > + depthBuf[d]--; > } > - memset(&((analysis_intra_data > *)analysis->intraData)->depth[count], > depthBuf[d], bytes); > - memset(&((analysis_intra_data > *)analysis->intraData)->chromaModes[count], > modeBuf[d], bytes); > - memset(&((analysis_intra_data > *)analysis->intraData)->partSizes[count], > partSizes[d], bytes); > - count += bytes; > + for (int numCTU = 0; numCTU < numCTUCopied; numCTU++) > + { > + memset(&((analysis_intra_data > *)analysis->intraData)->depth[count], depthBuf[d], bytes); > + memset(&((analysis_intra_data > *)analysis->intraData)->chromaModes[count], > modeBuf[d], bytes); > + memset(&((analysis_intra_data > *)analysis->intraData)->partSizes[count], > partSizes[d], bytes); > + count += bytes; > + if (m_param->scaleFactor) > + d += getCUIndex(&cuLoc, &count, bytes, 1); > + } > } > > if (!m_param->scaleFactor) > @@ -3384,10 +3421,18 @@ > } > else > { > + cuLoc.evenRowIndex = 0; > + cuLoc.oddRowIndex = m_param->num4x4Partitions * > cuLoc.widthInCU; > + cuLoc.switchCondition = 0; > uint8_t *tempLumaBuf = X265_MALLOC(uint8_t, > analysis->numCUsInFrame * scaledNumPartition); > X265_FREAD(tempLumaBuf, sizeof(uint8_t), > analysis->numCUsInFrame * scaledNumPartition, m_analysisFileIn, > intraPic->modes); > - for (uint32_t ctu32Idx = 0, cnt = 0; ctu32Idx < > analysis->numCUsInFrame * scaledNumPartition; ctu32Idx++, cnt += factor) > + uint32_t cnt = 0; > + for (uint32_t ctu32Idx = 0; ctu32Idx < > analysis->numCUsInFrame * scaledNumPartition; ctu32Idx++) > + { > memset(&((analysis_intra_data > *)analysis->intraData)->modes[cnt], tempLumaBuf[ctu32Idx], factor); > + cnt += factor; > + ctu32Idx += getCUIndex(&cuLoc, &cnt, factor, 0); > + } > X265_FREE(tempLumaBuf); > } > X265_FREE(tempBuf); > @@ -3451,44 +3496,94 @@ > } > } > > - size_t count = 0; > + uint32_t count = 0; > + cuLoc.switchCondition = 0; > for (uint32_t d = 0; d < depthBytes; d++) > { > int bytes = analysis->numPartitions >> (depthBuf[d] * 2); > - if (m_param->scaleFactor && modeBuf[d] == MODE_INTRA && > depthBuf[d] == 0) > - depthBuf[d] = 1; > - memset(&((analysis_inter_data > *)analysis->interData)->depth[count], > depthBuf[d], bytes); > - memset(&((analysis_inter_data > *)analysis->interData)->modes[count], > modeBuf[d], bytes); > - if (m_param->analysisReuseLevel > 4) > + bool isScaledMaxCUSize = false; > + int numCTUCopied = 1; > + int writeDepth = depthBuf[d]; > + if (m_param->scaleFactor) > { > - if (m_param->scaleFactor && modeBuf[d] == MODE_INTRA && > partSize[d] == SIZE_NxN) > - partSize[d] = SIZE_2Nx2N; > - memset(&((analysis_inter_data > *)analysis->interData)->partSize[count], > partSize[d], bytes); > - int numPU = (modeBuf[d] == MODE_INTRA) ? 1 : > nbPartsTable[(int)partSize[d]]; > - for (int pu = 0; pu < numPU; pu++) > + if (!depthBuf[d]) //copy data of one 64x64 to four scaled > 64x64 CTUs. > { > - if (pu) d++; > - ((analysis_inter_data > *)analysis->interData)->mergeFlag[count > + pu] = mergeFlag[d]; > - if (m_param->analysisReuseLevel == 10) > + isScaledMaxCUSize = true; > + bytes /= 4; > + numCTUCopied = 4; > + } > + if ((modeBuf[d] != MODE_INTRA && depthBuf[d] != 0) || > (modeBuf[d] == MODE_INTRA && depthBuf[d] > 1)) > + writeDepth--; > + } > + > + for (int numCTU = 0; numCTU < numCTUCopied; numCTU++) > + { > + memset(&((analysis_inter_data > *)analysis->interData)->depth[count], writeDepth, bytes); > + memset(&((analysis_inter_data > *)analysis->interData)->modes[count], modeBuf[d], bytes); > + if (m_param->analysisReuseLevel == 10 && bIntraInInter) > + memset(&((analysis_intra_data > *)analysis->intraData)->chromaModes[count], > chromaDir[d], bytes); > + > + if (m_param->analysisReuseLevel > 4) > + { > + puOrientation puOrient; > + puOrient.init(); > + if (m_param->scaleFactor && modeBuf[d] == MODE_INTRA > && partSize[d] == SIZE_NxN) > + partSize[d] = SIZE_2Nx2N; > + int partitionSize = partSize[d]; > + if (isScaledMaxCUSize && partSize[d] != SIZE_2Nx2N) > + partitionSize = getPuShape(&puOrient, > partSize[d], numCTU); > + memset(&((analysis_inter_data > *)analysis->interData)->partSize[count], > partitionSize, bytes); > + int numPU = (modeBuf[d] == MODE_INTRA) ? 1 : > nbPartsTable[(int)partSize[d]]; > + for (int pu = 0; pu < numPU; pu++) > { > - ((analysis_inter_data > *)analysis->interData)->interDir[count > + pu] = interDir[d]; > - for (uint32_t i = 0; i < numDir; i++) > + if (!isScaledMaxCUSize && pu) > + d++; > + int restoreD = d; > + /* Adjust d value when the current CTU takes data > from 2nd PU */ > + if (puOrient.isRect || (puOrient.isAmp && > partitionSize == SIZE_2Nx2N)) > { > - ((analysis_inter_data > *)analysis->interData)->mvpIdx[i][count > + pu] = mvpIdx[i][d]; > - ((analysis_inter_data > *)analysis->interData)->refIdx[i][count > + pu] = refIdx[i][d]; > - if (m_param->scaleFactor) > + if ((numCTU > 1 && !puOrient.isVert) || > ((numCTU % 2 == 1) && puOrient.isVert)) > + d++; > + } > + if (puOrient.isAmp && pu) > + d++; > + > + ((analysis_inter_data > *)analysis->interData)->mergeFlag[count > + pu] = mergeFlag[d]; > + if (m_param->analysisReuseLevel == 10) > + { > + ((analysis_inter_data > *)analysis->interData)->interDir[count > + pu] = interDir[d]; > + MV mvCopy[2]; > + for (uint32_t i = 0; i < numDir; i++) > { > - mv[i][d].x *= > (int16_t)m_param->scaleFactor; > - mv[i][d].y *= > (int16_t)m_param->scaleFactor; > + ((analysis_inter_data > *)analysis->interData)->mvpIdx[i][count + pu] = mvpIdx[i][d]; > + ((analysis_inter_data > *)analysis->interData)->refIdx[i][count + pu] = refIdx[i][d]; > + mvCopy[i].x = mv[i][d].x; > + mvCopy[i].y = mv[i][d].y; > + if (m_param->scaleFactor) > + { > + mvCopy[i].x = mv[i][d].x * > (int16_t)m_param->scaleFactor; > + mvCopy[i].y = mv[i][d].y * > (int16_t)m_param->scaleFactor; > + } > + memcpy(&((analysis_inter_data > *)analysis->interData)->mv[i][count + pu], &mvCopy[i], sizeof(MV)); > } > - memcpy(&((analysis_inter_data > *)analysis->interData)->mv[i][count + pu], &mv[i][d], sizeof(MV)); > + } > + d = restoreD; // Restore d value after copying > each of the 4 64x64 CTUs > + > + if (isScaledMaxCUSize && (puOrient.isRect || > puOrient.isAmp)) > + { > + /* Skip PU index when current CTU is a 2Nx2N > */ > + if (partitionSize == SIZE_2Nx2N) > + pu++; > + /* Adjust d after completion of all 4 CTU > copies */ > + if (numCTU == 3 && (pu == (numPU - 1))) > + d++; > } > } > } > - if (m_param->analysisReuseLevel == 10 && bIntraInInter) > - memset(&((analysis_intra_data > *)analysis->intraData)->chromaModes[count], > chromaDir[d], bytes); > + count += bytes; > + if (m_param->scaleFactor) > + d += getCUIndex(&cuLoc, &count, bytes, 1); > } > - count += bytes; > } > > X265_FREE(tempBuf); > @@ -3509,10 +3604,18 @@ > } > else > { > + cuLoc.evenRowIndex = 0; > + cuLoc.oddRowIndex = m_param->num4x4Partitions * > cuLoc.widthInCU; > + cuLoc.switchCondition = 0; > uint8_t *tempLumaBuf = X265_MALLOC(uint8_t, > analysis->numCUsInFrame * scaledNumPartition); > X265_FREAD(tempLumaBuf, sizeof(uint8_t), > analysis->numCUsInFrame * scaledNumPartition, m_analysisFileIn, > intraPic->modes); > - for (uint32_t ctu32Idx = 0, cnt = 0; ctu32Idx < > analysis->numCUsInFrame * scaledNumPartition; ctu32Idx++, cnt += factor) > + uint32_t cnt = 0; > + for (uint32_t ctu32Idx = 0; ctu32Idx < > analysis->numCUsInFrame * scaledNumPartition; ctu32Idx++) > + { > memset(&((analysis_intra_data > *)analysis->intraData)->modes[cnt], tempLumaBuf[ctu32Idx], factor); > + cnt += factor; > + ctu32Idx += getCUIndex(&cuLoc, &cnt, factor, 0); > + } > X265_FREE(tempLumaBuf); > } > } > @@ -3524,9 +3627,123 @@ > if (numDir == 1) > totalConsumedBytes = consumedBytes; > } > + > + /* Restore to the current encode's numPartitions and numCUsInFrame */ > + if (m_param->scaleFactor) > + { > + analysis->numPartitions = m_param->num4x4Partitions; > + analysis->numCUsInFrame = cuLoc.heightInCU * cuLoc.widthInCU; > + } > #undef X265_FREAD > } > > +/* Toggle between two consecutive CTU rows. The save's CTU is copied > +twice consecutively in the first and second CTU row of load*/ > + > +int Encoder::getCUIndex(cuLocation* cuLoc, uint32_t* count, int bytes, > int flag) > +{ > + int index = 0; > + cuLoc->switchCondition += bytes; > + int isBoundaryW = (*count % (m_param->num4x4Partitions * > cuLoc->widthInCU) == 0); > + > + /* Width boundary case : > + Skip to appropriate index when out of boundary cases occur > + Out of boundary may occur when the out of bound pixels along > + the width in low resoultion is greater than half of the maxCUSize */ > + if (cuLoc->skipWidth && isBoundaryW) > + { > + if (flag) > + index++; > + else > + { > + /* Number of 4x4 blocks in out of bound region */ > + int outOfBound = m_param->maxCUSize / 2; > + uint32_t sum = (uint32_t)pow((outOfBound >> 2), 2); > + index += sum; > + } > + cuLoc->switchCondition += m_param->num4x4Partitions; > + } > + > + /* Completed writing 2 CTUs - move to the last remembered index of > the next CTU row*/ > + if (cuLoc->switchCondition == 2 * m_param->num4x4Partitions) > + { > + if (isBoundaryW) > + cuLoc->evenRowIndex = *count + (m_param->num4x4Partitions * > cuLoc->widthInCU); // end of row - skip to the next even row > + else > + cuLoc->evenRowIndex = *count; > + *count = cuLoc->oddRowIndex; > + > + /* Height boundary case : > + Skip to appropriate index when out of boundary cases occur > + Out of boundary may occur when the out of bound pixels along > + the height in low resoultion is greater than half of the > maxCUSize */ > + int isBoundaryH = (*count >= (m_param->num4x4Partitions * > cuLoc->heightInCU * cuLoc->widthInCU)); > + if (cuLoc->skipHeight && isBoundaryH) > + { > + if (flag) > + index += 2; > + else > + { > + int outOfBound = m_param->maxCUSize / 2; > + uint32_t sum = (uint32_t)(2 * pow((abs(outOfBound) >> 2), > 2)); > + index += sum; > + } > + *count = cuLoc->evenRowIndex; > + cuLoc->switchCondition = 0; > + } > + } > + /* Completed writing 4 CTUs - move to the last remembered index of > + the previous CTU row to copy the next save CTU's data*/ > + else if (cuLoc->switchCondition == 4 * m_param->num4x4Partitions) > + { > + if (isBoundaryW) > + cuLoc->oddRowIndex = *count + (m_param->num4x4Partitions * > cuLoc->widthInCU); // end of row - skip to the next odd row > + else > + cuLoc->oddRowIndex = *count; > + *count = cuLoc->evenRowIndex; > + cuLoc->switchCondition = 0; > + } > + return index; > +} > + > +/* save load > + CTU0 CTU1 CTU2 CTU3 > + 2NxN 2Nx2N 2Nx2N 2Nx2N 2Nx2N > + NX2N 2Nx2N 2Nx2N 2Nx2N 2Nx2N > + 2NxnU 2NxN 2NxN 2Nx2N 2Nx2N > + 2NxnD 2Nx2N 2Nx2N 2NxN 2NxN > + nLx2N Nx2N 2Nx2N Nx2N 2Nx2N > + nRx2N 2Nx2N Nx2N 2Nx2N Nx2N > +*/ > +int Encoder::getPuShape(puOrientation* puOrient, int partSize, int > numCTU) > +{ > + puOrient->isRect = true; > + if (partSize == SIZE_Nx2N) > + puOrient->isVert = true; > + if (partSize >= SIZE_2NxnU) // All AMP modes > + { > + puOrient->isAmp = true; > + puOrient->isRect = false; > + if (partSize == SIZE_2NxnD && numCTU > 1) > + return SIZE_2NxN; > + else if (partSize == SIZE_2NxnU && numCTU < 2) > + return SIZE_2NxN; > + else if (partSize == SIZE_nLx2N) > + { > + puOrient->isVert = true; > + if (!(numCTU % 2)) > + return SIZE_Nx2N; > + } > + else if (partSize == SIZE_nRx2N) > + { > + puOrient->isVert = true; > + if (numCTU % 2) > + return SIZE_Nx2N; > + } > + } > + return SIZE_2Nx2N; > +} > + > void Encoder::readAnalysis2PassFile(x265_analysis_2Pass* analysis2Pass, > int curPoc, int sliceType) > { > > diff -r 0b781d592c8e -r ce647bfa20e2 source/encoder/encoder.h > --- a/source/encoder/encoder.h Mon Mar 05 11:24:22 2018 +0530 > +++ b/source/encoder/encoder.h Wed Feb 28 11:09:18 2018 +0530 > @@ -90,6 +90,43 @@ > RPSListNode* prior; > }; > > +struct cuLocation > +{ > + bool skipWidth; > + bool skipHeight; > + uint32_t heightInCU; > + uint32_t widthInCU; > + uint32_t oddRowIndex; > + uint32_t evenRowIndex; > + uint32_t switchCondition; > + > + void init(x265_param* param) > + { > + skipHeight = false; > + skipWidth = false; > + heightInCU = (param->sourceHeight + param->maxCUSize - 1) >> > param->maxLog2CUSize; > + widthInCU = (param->sourceWidth + param->maxCUSize - 1) >> > param->maxLog2CUSize; > + evenRowIndex = 0; > + oddRowIndex = param->num4x4Partitions * widthInCU; > + switchCondition = 0; // To switch between odd and even rows > + } > +}; > + > +struct puOrientation > +{ > + bool isVert; > + bool isRect; > + bool isAmp; > + > + void init() > + { > + isRect = false; > + isAmp = false; > + isVert = false; > + } > +}; > + > + > class FrameEncoder; > class DPB; > class Lookahead; > @@ -237,6 +274,10 @@ > > void readAnalysisFile(x265_analysis_data* analysis, int poc, const > x265_picture* picIn); > > + int getCUIndex(cuLocation* cuLoc, uint32_t* count, int bytes, int > flag); > + > + int getPuShape(puOrientation* puOrient, int partSize, int numCTU); > + > void writeAnalysisFile(x265_analysis_data* pic, FrameData > &curEncData); > void readAnalysis2PassFile(x265_analysis_2Pass* analysis2Pass, int > poc, int sliceType); > void writeAnalysis2PassFile(x265_analysis_2Pass* analysis2Pass, > FrameData &curEncData, int slicetype); > > _______________________________________________ > x265-devel mailing list > x265-devel@videolan.org > https://mailman.videolan.org/listinfo/x265-devel > >
Pushed.
_______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel