On Fri, Mar 9, 2018 at 7:12 PM, <bha...@multicorewareinc.com> wrote:
> # HG changeset patch > # User Bhavna Hariharan <bha...@multicorewareinc.com> > # Date 1520595579 -19800 > # Fri Mar 09 17:09:39 2018 +0530 > # Node ID e5425bd33176d6366f34d93e80f9cb1c9c4ebe6f > # Parent d292dacb81d8607ce0b2fb106b7383b360863e9d > dynamic-refine: enable switching between inter refinement levels 0-3 > based on the content and the encoder properties. > > The algorithm has 2 parts > 1) Training - Encode frames with refine-inter 3 and calulate corresponding > feature values until saturation of values. The training restarts when a > scenecut > is encountered. > 2) Classification - Based on the prior probability calculated from the > training > data and the feature metric of the current CU an appropriate refine-inter > level > is chosen for the CU. > > diff -r d292dacb81d8 -r e5425bd33176 source/common/cudata.cpp > --- a/source/common/cudata.cpp Fri Mar 09 09:44:53 2018 +0530 > +++ b/source/common/cudata.cpp Fri Mar 09 17:09:39 2018 +0530 > @@ -317,6 +317,16 @@ > m_cuAboveLeft = (m_cuLeft && m_cuAbove) ? > m_encData->getPicCTU(m_cuAddr - widthInCU - 1) : NULL; > m_cuAboveRight = (m_cuAbove && ((m_cuAddr % widthInCU) < (widthInCU - > 1))) ? m_encData->getPicCTU(m_cuAddr - widthInCU + 1) : NULL; > memset(m_distortion, 0, m_numPartitions * sizeof(sse_t)); > + > + if (m_encData->m_param->bDynamicRefine) > + { > + int size = m_encData->m_param->maxCUDepth * > X265_REFINE_INTER_LEVELS; > + CHECKED_MALLOC_ZERO(m_collectCURd, uint64_t, size); > + CHECKED_MALLOC_ZERO(m_collectCUVariance, uint32_t, size); > + CHECKED_MALLOC_ZERO(m_collectCUCount, uint32_t, size); > + } > +fail: > + return; > } > > // initialize Sub partition > diff -r d292dacb81d8 -r e5425bd33176 source/common/cudata.h > --- a/source/common/cudata.h Fri Mar 09 09:44:53 2018 +0530 > +++ b/source/common/cudata.h Fri Mar 09 17:09:39 2018 +0530 > @@ -224,6 +224,11 @@ > uint64_t m_fAc_den[3]; > uint64_t m_fDc_den[3]; > > + /* Feature values per CTU for dynamic refinement */ > + uint64_t* m_collectCURd; > + uint32_t* m_collectCUVariance; > + uint32_t* m_collectCUCount; > + > CUData(); > > void initialize(const CUDataMemPool& dataPool, uint32_t depth, > const x265_param& param, int instance); > diff -r d292dacb81d8 -r e5425bd33176 source/common/frame.cpp > --- a/source/common/frame.cpp Fri Mar 09 09:44:53 2018 +0530 > +++ b/source/common/frame.cpp Fri Mar 09 17:09:39 2018 +0530 > @@ -53,6 +53,7 @@ > m_addOnDepth = NULL; > m_addOnCtuInfo = NULL; > m_addOnPrevChange = NULL; > + m_classifyFrame = false; > } > > bool Frame::create(x265_param *param, float* quantOffsets) > @@ -85,6 +86,14 @@ > m_analysis2Pass.analysisFramedata = NULL; > } > > + if (param->bDynamicRefine) > + { > + int size = m_param->maxCUDepth * X265_REFINE_INTER_LEVELS; > + CHECKED_MALLOC_ZERO(m_classifyRd, uint64_t, size); > + CHECKED_MALLOC_ZERO(m_classifyVariance, uint64_t, size); > + CHECKED_MALLOC_ZERO(m_classifyCount, uint32_t, size); > + } > + > if (m_fencPic->create(param, !!m_param->bCopyPicToFrame) && > m_lowres.create(m_fencPic, param->bframes, !!param->rc.aqMode || > !!param->bAQMotion, param->rc.qgSize)) > { > X265_CHECK((m_reconColCount == NULL), "m_reconColCount was > initialized"); > @@ -226,4 +235,11 @@ > } > m_lowres.destroy(); > X265_FREE(m_rcData); > + > + if (m_param->bDynamicRefine) > + { > + X265_FREE_ZERO(m_classifyRd); > + X265_FREE_ZERO(m_classifyVariance); > + X265_FREE_ZERO(m_classifyCount); > + } > } > diff -r d292dacb81d8 -r e5425bd33176 source/common/frame.h > --- a/source/common/frame.h Fri Mar 09 09:44:53 2018 +0530 > +++ b/source/common/frame.h Fri Mar 09 17:09:39 2018 +0530 > @@ -122,6 +122,14 @@ > uint8_t** m_addOnDepth; > uint8_t** m_addOnCtuInfo; > int** m_addOnPrevChange; > + > + /* Average feature values of frames being considered for > classification */ > + uint64_t* m_classifyRd; > + uint64_t* m_classifyVariance; > + uint32_t* m_classifyCount; > + > + bool m_classifyFrame; > + > Frame(); > > bool create(x265_param *param, float* quantOffsets); > diff -r d292dacb81d8 -r e5425bd33176 source/common/lowres.cpp > --- a/source/common/lowres.cpp Fri Mar 09 09:44:53 2018 +0530 > +++ b/source/common/lowres.cpp Fri Mar 09 17:09:39 2018 +0530 > @@ -59,10 +59,12 @@ > CHECKED_MALLOC_ZERO(qpAqMotionOffset, double, cuCountFullRes); > CHECKED_MALLOC_ZERO(invQscaleFactor, int, cuCountFullRes); > CHECKED_MALLOC_ZERO(qpCuTreeOffset, double, cuCountFullRes); > - CHECKED_MALLOC_ZERO(blockVariance, uint32_t, cuCountFullRes); > if (qgSize == 8) > CHECKED_MALLOC_ZERO(invQscaleFactor8x8, int, cuCount); > } > + > + if (origPic->m_param->bDynamicRefine) > + CHECKED_MALLOC_ZERO(blockVariance, uint32_t, cuCountFullRes); > CHECKED_MALLOC(propagateCost, uint16_t, cuCount); > > /* allocate lowres buffers */ > diff -r d292dacb81d8 -r e5425bd33176 source/encoder/analysis.cpp > --- a/source/encoder/analysis.cpp Fri Mar 09 09:44:53 2018 +0530 > +++ b/source/encoder/analysis.cpp Fri Mar 09 17:09:39 2018 +0530 > @@ -1184,7 +1184,7 @@ > > if (m_evaluateInter) > { > - if (m_param->interRefine == 2) > + if (m_refineLevel == 2) > { > if (parentCTU.m_predMode[cuGeom.absPartIdx] == MODE_SKIP) > skipModes = true; > @@ -1307,7 +1307,7 @@ > md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp); > checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], > md.pred[PRED_MERGE], cuGeom); > if (m_param->rdLevel) > - skipModes = (m_param->bEnableEarlySkip || > m_param->interRefine == 2) > + skipModes = (m_param->bEnableEarlySkip || m_refineLevel > == 2) > && md.bestMode && md.bestMode->cu.isSkipped(0); // TODO: > sa8d threshold per depth > } > if (md.bestMode && m_param->bEnableRecursionSkip && > !bCtuInfoCheck && !(m_param->bMVType && m_param->analysisReuseLevel == 7 && > (m_modeFlag[0] || m_modeFlag[1]))) > @@ -1874,7 +1874,7 @@ > > if (m_evaluateInter) > { > - if (m_param->interRefine == 2) > + if (m_refineLevel == 2) > { > if (parentCTU.m_predMode[cuGeom.absPartIdx] == MODE_SKIP) > skipModes = true; > @@ -2004,7 +2004,7 @@ > md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp); > md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp); > checkMerge2Nx2N_rd5_6(md.pred[PRED_SKIP], > md.pred[PRED_MERGE], cuGeom); > - skipModes = (m_param->bEnableEarlySkip || > m_param->interRefine == 2) && > + skipModes = (m_param->bEnableEarlySkip || m_refineLevel == 2) > && > md.bestMode && !md.bestMode->cu.getQtRootCbf(0); > refMasks[0] = allSplitRefs; > md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom, qp); > @@ -2413,7 +2413,16 @@ > bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY); > bool bDecidedDepth = parentCTU.m_cuDepth[cuGeom.absPartIdx] == depth; > > - int split = (m_param->interRefine && cuGeom.log2CUSize == > (uint32_t)(g_log2Size[m_param->minCUSize] + 1) && bDecidedDepth); > + TrainingData td; > + td.init(parentCTU, cuGeom); > + > + if (!m_param->bDynamicRefine) > + m_refineLevel = m_param->interRefine; > + else > + m_refineLevel = m_frame->m_classifyFrame ? 0 : 3; > + > + int split = (m_refineLevel && cuGeom.log2CUSize == > (uint32_t)(g_log2Size[m_param->minCUSize] + 1) && bDecidedDepth); > + td.split = split; > > if (bDecidedDepth) > { > @@ -2423,7 +2432,7 @@ > md.bestMode = &mode; > mode.cu.initSubCU(parentCTU, cuGeom, qp); > PartSize size = (PartSize)parentCTU.m_ > partSize[cuGeom.absPartIdx]; > - if (parentCTU.isIntra(cuGeom.absPartIdx) && m_param->interRefine > < 2) > + if (parentCTU.isIntra(cuGeom.absPartIdx) && m_refineLevel < 2) > { > if (m_param->intraRefine == 4) > compressIntraCU(parentCTU, cuGeom, qp); > @@ -2439,7 +2448,7 @@ > checkIntra(mode, cuGeom, size); > } > } > - else if (!parentCTU.isIntra(cuGeom.absPartIdx) && > m_param->interRefine < 2) > + else if (!parentCTU.isIntra(cuGeom.absPartIdx) && m_refineLevel > < 2) > { > mode.cu.copyFromPic(parentCTU, cuGeom, m_csp, false); > uint32_t numPU = parentCTU.getNumPartInter( > cuGeom.absPartIdx); > @@ -2501,7 +2510,7 @@ > } > motionCompensation(mode.cu, pu, mode.predYuv, true, > (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400)); > } > - if (!m_param->interRefine && parentCTU.isSkipped(cuGeom. > absPartIdx)) > + if (!m_param->interRefine && !m_param->bDynamicRefine && > parentCTU.isSkipped(cuGeom.absPartIdx)) > encodeResAndCalcRdSkipCU(mode); > else > encodeResAndCalcRdInterCU(mode, cuGeom); > @@ -2512,7 +2521,7 @@ > checkDQP(mode, cuGeom); > } > > - if (m_param->interRefine < 2) > + if (m_refineLevel < 2) > { > if (m_bTryLossless) > tryLossless(cuGeom); > @@ -2540,7 +2549,10 @@ > } > } > > - if (m_param->interRefine > 1 || (m_param->interRefine && > parentCTU.m_predMode[cuGeom.absPartIdx] == MODE_SKIP && > !mode.cu.isSkipped(0))) > + if (m_param->bDynamicRefine) > + classifyCU(parentCTU,cuGeom, *md.bestMode, td); > + > + if (m_refineLevel > 1 || (m_refineLevel && > parentCTU.m_predMode[cuGeom.absPartIdx] == MODE_SKIP && > !mode.cu.isSkipped(0))) > { > m_evaluateInter = 1; > m_param->rdLevel > 4 ? compressInterCU_rd5_6(parentCTU, > cuGeom, qp) : compressInterCU_rd0_4(parentCTU, cuGeom, qp); > @@ -2599,7 +2611,7 @@ > else > updateModeCost(*splitPred); > > - if (m_param->interRefine) > + if (m_refineLevel) > { > if (m_param->rdLevel > 1) > checkBestMode(*splitPred, cuGeom.depth); > @@ -2613,6 +2625,83 @@ > md.bestMode->cu.copyToPic(depth); > md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic, > parentCTU.m_cuAddr, cuGeom.absPartIdx); > } > + if (m_param->bDynamicRefine && bDecidedDepth) > + trainCU(parentCTU, cuGeom, *md.bestMode, td); > +} > + > +void Analysis::classifyCU(const CUData& ctu, const CUGeom& cuGeom, const > Mode& bestMode, TrainingData& trainData) > +{ > + uint32_t depth = cuGeom.depth; > + trainData.cuVariance = calculateCUVariance(ctu, cuGeom); > + if (m_frame->m_classifyFrame) > + { > + uint64_t diffRefine[X265_REFINE_INTER_LEVELS]; > + uint64_t diffRefineRd[X265_REFINE_INTER_LEVELS]; > + float probRefine[X265_REFINE_INTER_LEVELS] = { 0 }; > + uint8_t varRefineLevel = 0; > + uint8_t rdRefineLevel = 0; > + uint64_t cuCost = bestMode.rdCost; > + > + int offset = (depth * X265_REFINE_INTER_LEVELS) + 1; > + if (cuCost < m_frame->m_classifyRd[offset]) > + m_refineLevel = 1; > + else > + { > + uint64_t trainingCount = 0; > + for (uint8_t i = 0; i < X265_REFINE_INTER_LEVELS; i++) > + { > + offset = (depth * X265_REFINE_INTER_LEVELS) + i; > + trainingCount += m_frame->m_classifyCount[offset]; > + } > + for (uint8_t i = 0; i < X265_REFINE_INTER_LEVELS; i++) > + { > + offset = (depth * X265_REFINE_INTER_LEVELS) + i; > + /* Calculate distance values */ > + diffRefine[i] = abs((int64_t)(trainData.cuVariance - > m_frame->m_classifyVariance[offset])); > + diffRefineRd[i] = abs((int64_t)(cuCost - > m_frame->m_classifyRd[offset])); > + > + /* Calculate prior probability - ranges between 0 and 1 */ > + if (trainingCount) > + probRefine[i] = ((float)m_frame->m_classifyCount[offset] > / (float)trainingCount); > + > + /* Bayesian classification - P(c|x)P(x) = P(x|c)P(c) > + P(c|x) is the posterior probability of class given > predictor. > + P(c) is the prior probability of class. > + P(x|c) is the likelihood which is the probability of > predictor given class. > + P(x) is the prior probability of predictor.*/ > + if ((diffRefine[i] * probRefine[m_refineLevel]) < > (diffRefine[m_refineLevel] * probRefine[i])) > + varRefineLevel = i; > + if ((diffRefineRd[i] * probRefine[m_refineLevel]) < > (diffRefineRd[m_refineLevel] * probRefine[i])) > + rdRefineLevel = i; > + } > + m_refineLevel = X265_MAX(varRefineLevel, rdRefineLevel); > + } > + } > +} > + > +void Analysis::trainCU(const CUData& ctu, const CUGeom& cuGeom, const > Mode& bestMode, TrainingData& trainData) > +{ > + uint32_t depth = cuGeom.depth; > + int classify = 0; > + if (!m_frame->m_classifyFrame) > + { > + if (trainData.predMode == ctu.m_predMode[cuGeom.absPartIdx] && > trainData.partSize == ctu.m_partSize[cuGeom.absPartIdx] > + && trainData.mergeFlag == ctu.m_mergeFlag[cuGeom.absPartIdx]) > + classify = 0; > + else if ((depth == m_param->maxCUDepth - 1) && trainData.split) > + classify = 1; > + else if (trainData.partSize == SIZE_2Nx2N && trainData.partSize > == ctu.m_partSize[cuGeom.absPartIdx]) > + classify = 2; > + else > + classify = 3; > + } > + else > + classify = m_refineLevel; > + uint64_t cuCost = bestMode.rdCost; > + int offset = (depth * X265_REFINE_INTER_LEVELS) + classify; > + ctu.m_collectCURd[offset] += cuCost; > + ctu.m_collectCUVariance[offset] += trainData.cuVariance; > + ctu.m_collectCUCount[offset]++; > } > > /* sets md.bestMode if a valid merge candidate is found, else leaves it > NULL */ > @@ -3414,6 +3503,33 @@ > return false; > } > > +uint32_t Analysis::calculateCUVariance(const CUData& ctu, const CUGeom& > cuGeom) > +{ > + uint32_t cuVariance = 0; > + uint32_t *blockVariance = m_frame->m_lowres.blockVariance; > + int loopIncr = (m_param->rc.qgSize == 8) ? 8 : 16; > + > + uint32_t width = m_frame->m_fencPic->m_picWidth; > + uint32_t height = m_frame->m_fencPic->m_picHeight; > + uint32_t block_x = ctu.m_cuPelX + g_zscanToPelX[cuGeom.absPartIdx]; > + uint32_t block_y = ctu.m_cuPelY + g_zscanToPelY[cuGeom.absPartIdx]; > + uint32_t maxCols = (m_frame->m_fencPic->m_picWidth + (loopIncr - 1)) > / loopIncr; > + uint32_t blockSize = m_param->maxCUSize >> cuGeom.depth; > + uint32_t cnt = 0; > + > + for (uint32_t block_yy = block_y; block_yy < block_y + blockSize && > block_yy < height; block_yy += loopIncr) > + { > + for (uint32_t block_xx = block_x; block_xx < block_x + blockSize > && block_xx < width; block_xx += loopIncr) > + { > + uint32_t idx = ((block_yy / loopIncr) * (maxCols)) + > (block_xx / loopIncr); > + cuVariance += blockVariance[idx]; > + cnt++; > + } > + } > + > + return cuVariance / cnt; > +} > + > int Analysis::calculateQpforCuSize(const CUData& ctu, const CUGeom& > cuGeom, int32_t complexCheck, double baseQp) > { > FrameData& curEncData = *m_frame->m_encData; > diff -r d292dacb81d8 -r e5425bd33176 source/encoder/analysis.h > --- a/source/encoder/analysis.h Fri Mar 09 09:44:53 2018 +0530 > +++ b/source/encoder/analysis.h Fri Mar 09 17:09:39 2018 +0530 > @@ -142,8 +142,29 @@ > uint8_t* m_multipassModes; > > uint8_t m_evaluateInter; > + int32_t m_refineLevel; > + > uint8_t* m_additionalCtuInfo; > int* m_prevCtuInfoChange; > + > + struct TrainingData > + { > + uint32_t cuVariance; > + uint8_t predMode; > + uint8_t partSize; > + uint8_t mergeFlag; > + int split; > + > + void init(const CUData& parentCTU, const CUGeom& cuGeom) > + { > + cuVariance = 0; > + predMode = parentCTU.m_predMode[cuGeom.absPartIdx]; > + partSize = parentCTU.m_partSize[cuGeom.absPartIdx]; > + mergeFlag = parentCTU.m_mergeFlag[cuGeom.absPartIdx]; > + split = 0; > + } > + }; > + > /* refine RD based on QP for rd-levels 5 and 6 */ > void qprdRefine(const CUData& parentCTU, const CUGeom& cuGeom, > int32_t qp, int32_t lqp); > > @@ -182,6 +203,10 @@ > void encodeResidue(const CUData& parentCTU, const CUGeom& cuGeom); > > int calculateQpforCuSize(const CUData& ctu, const CUGeom& cuGeom, > int32_t complexCheck = 0, double baseQP = -1); > + uint32_t calculateCUVariance(const CUData& ctu, const CUGeom& cuGeom); > + > + void classifyCU(const CUData& ctu, const CUGeom& cuGeom, const Mode& > bestMode, TrainingData& trainData); > + void trainCU(const CUData& ctu, const CUGeom& cuGeom, const Mode& > bestMode, TrainingData& trainData); > > void calculateNormFactor(CUData& ctu, int qp); > void normFactor(const pixel* src, uint32_t blockSize, CUData& ctu, > int qp, TextType ttype); > diff -r d292dacb81d8 -r e5425bd33176 source/encoder/encoder.cpp > --- a/source/encoder/encoder.cpp Fri Mar 09 09:44:53 2018 +0530 > +++ b/source/encoder/encoder.cpp Fri Mar 09 17:09:39 2018 +0530 > @@ -96,6 +96,7 @@ > #endif > > m_prevTonemapPayload.payload = NULL; > + m_startPoint = 0; > } > inline char *strcatFilename(const char *input, const char *suffix) > { > @@ -412,6 +413,17 @@ > if (m_bToneMap) > m_numCimInfo = m_hdr10plus_api->hdr10plus_ > json_to_movie_cim(m_param->toneMapFile, m_cim); > #endif > + > + if (m_param->bDynamicRefine) > + { > + int size = m_param->totalFrames * m_param->maxCUDepth * > X265_REFINE_INTER_LEVELS; > + CHECKED_MALLOC_ZERO(m_variance, uint64_t, size); > + CHECKED_MALLOC_ZERO(m_rdCost, uint64_t, size); > + CHECKED_MALLOC_ZERO(m_trainingCount, uint32_t, size); > + return; > + fail: > + m_aborted = true; > + } > } > > void Encoder::stopJobs() > @@ -697,7 +709,13 @@ > if (m_bToneMap) > m_hdr10plus_api->hdr10plus_clear_movie(m_cim, m_numCimInfo); > #endif > - > + > + if (m_param->bDynamicRefine) > + { > + X265_FREE(m_variance); > + X265_FREE(m_rdCost); > + X265_FREE(m_trainingCount); > + } > if (m_exportedPic) > { > ATOMIC_DEC(&m_exportedPic->m_countRefEncoders); > diff -r d292dacb81d8 -r e5425bd33176 source/encoder/encoder.h > --- a/source/encoder/encoder.h Fri Mar 09 09:44:53 2018 +0530 > +++ b/source/encoder/encoder.h Fri Mar 09 17:09:39 2018 +0530 > @@ -221,6 +221,13 @@ > > x265_sei_payload m_prevTonemapPayload; > > + /* Collect frame level feature data */ > + uint64_t* m_rdCost; > + uint64_t* m_variance; > + uint32_t* m_trainingCount; > + int32_t m_startPoint; > + Lock m_dynamicRefineLock; > + > Encoder(); > ~Encoder() > { > diff -r d292dacb81d8 -r e5425bd33176 source/encoder/frameencoder.cpp > --- a/source/encoder/frameencoder.cpp Fri Mar 09 09:44:53 2018 +0530 > +++ b/source/encoder/frameencoder.cpp Fri Mar 09 17:09:39 2018 +0530 > @@ -736,6 +736,9 @@ > m_top->m_rateControl->m_startEndOrder.incr(); // faked > rateControlEnd calls for negative frames > } > > + if (m_param->bDynamicRefine) > + computeAvgTrainingData(); > + > /* Analyze CTU rows, most of the hard work is done here. Frame is > * compressed in a wave-front pattern if WPP is enabled. Row based > loop > * filters runs behind the CTU compression and reconstruction */ > @@ -1457,6 +1460,30 @@ > // Does all the CU analysis, returns best top level mode decision > Mode& best = tld.analysis.compressCTU(*ctu, *m_frame, > m_cuGeoms[m_ctuGeomMap[cuAddr]], rowCoder); > > + if (m_param->bDynamicRefine) > + { > + { > + ScopedLock dynLock(m_top->m_dynamicRefineLock); > + for (uint32_t i = 0; i < X265_REFINE_INTER_LEVELS; i++) > + { > + for (uint32_t depth = 0; depth < m_param->maxCUDepth; > depth++) > + { > + int offset = (depth * X265_REFINE_INTER_LEVELS) + > i; > + int index = (m_frame->m_encodeOrder * > X265_REFINE_INTER_LEVELS * m_param->maxCUDepth) + offset; > + if (ctu->m_collectCUCount[offset]) > + { > + m_top->m_variance[index] += > ctu->m_collectCUVariance[offset]; > + m_top->m_rdCost[index] += > ctu->m_collectCURd[offset]; > + m_top->m_trainingCount[index] += > ctu->m_collectCUCount[offset]; > + } > + } > + } > + } > + X265_FREE_ZERO(ctu->m_collectCUVariance); > + X265_FREE_ZERO(ctu->m_collectCURd); > + X265_FREE_ZERO(ctu->m_collectCUCount); > + } > + > // take a sample of the current active worker count > ATOMIC_ADD(&m_totalActiveWorkerCount, m_activeWorkerCount); > ATOMIC_INC(&m_activeWorkerCountSamples); > @@ -1839,6 +1866,58 @@ > m_completionEvent.trigger(); > } > > +void FrameEncoder::computeAvgTrainingData() > +{ > + if (m_frame->m_lowres.bScenecut) > + m_top->m_startPoint = m_frame->m_encodeOrder; > + > + if (m_frame->m_encodeOrder - m_top->m_startPoint < 2 * > m_param->frameNumThreads) > + m_frame->m_classifyFrame = false; > + else > + m_frame->m_classifyFrame = true; > + > + int size = m_param->maxCUDepth * X265_REFINE_INTER_LEVELS; > + memset(m_frame->m_classifyRd, 0, size * sizeof(uint64_t)); > + memset(m_frame->m_classifyVariance, 0, size * sizeof(uint64_t)); > + memset(m_frame->m_classifyCount, 0, size * sizeof(uint32_t)); > + > + if (m_frame->m_classifyFrame) > + { > + uint32_t limit = m_frame->m_encodeOrder - > m_param->frameNumThreads - 1; > + for (uint32_t i = m_top->m_startPoint + 1; i < limit; i++) > + { > + for (uint32_t j = 0; j < X265_REFINE_INTER_LEVELS; j++) > + { > + for (uint32_t depth = 0; depth < m_param->maxCUDepth; > depth++) > + { > + int offset = (depth * X265_REFINE_INTER_LEVELS) + j; > + int index = (i* X265_REFINE_INTER_LEVELS * > m_param->maxCUDepth) + offset; > + if (m_top->m_trainingCount[index]) > + { > + m_frame->m_classifyRd[offset] += > m_top->m_rdCost[index] / m_top->m_trainingCount[index]; > + m_frame->m_classifyVariance[offset] += > m_top->m_variance[index] / m_top->m_trainingCount[index]; > + m_frame->m_classifyCount[offset] += > m_top->m_trainingCount[index]; > + } > + } > + } > + } > + /* Calculates the average feature values of historic frames that > are being considered for the current frame */ > + int historyCount = m_frame->m_encodeOrder - > m_param->frameNumThreads - m_top->m_startPoint - 1; > + if (historyCount) > + { > + for (uint32_t j = 0; j < X265_REFINE_INTER_LEVELS; j++) > + { > + for (uint32_t depth = 0; depth < m_param->maxCUDepth; > depth++) > + { > + int offset = (depth * X265_REFINE_INTER_LEVELS) + j; > + m_frame->m_classifyRd[offset] /= historyCount; > + m_frame->m_classifyVariance[offset] /= historyCount; > + } > + } > + } > + } > +} > + > /* collect statistics about CU coding decisions, return total QP */ > int FrameEncoder::collectCTUStatistics(const CUData& ctu, FrameStats* > log) > { > diff -r d292dacb81d8 -r e5425bd33176 source/encoder/frameencoder.h > --- a/source/encoder/frameencoder.h Fri Mar 09 09:44:53 2018 +0530 > +++ b/source/encoder/frameencoder.h Fri Mar 09 17:09:39 2018 +0530 > @@ -230,6 +230,7 @@ > void threadMain(); > int collectCTUStatistics(const CUData& ctu, FrameStats* frameLog); > void noiseReductionUpdate(); > + void computeAvgTrainingData(); > > /* Called by WaveFront::findJob() */ > virtual void processRow(int row, int threadId); > diff -r d292dacb81d8 -r e5425bd33176 source/encoder/slicetype.cpp > --- a/source/encoder/slicetype.cpp Fri Mar 09 09:44:53 2018 +0530 > +++ b/source/encoder/slicetype.cpp Fri Mar 09 17:09:39 2018 +0530 > @@ -178,12 +178,12 @@ > } > } > > - /* Need variance data for weighted prediction */ > + /* Need variance data for weighted prediction and dynamic > refinement*/ > if (param->bEnableWeightedPred || param->bEnableWeightedBiPred) > - { > + { > for (blockY = 0; blockY < maxRow; blockY += loopIncr) > - for (blockX = 0; blockX < maxCol; blockX += loopIncr) > - acEnergyCu(curFrame, blockX, blockY, > param->internalCsp, param->rc.qgSize); > + for (blockX = 0; blockX < maxCol; blockX += loopIncr) > + acEnergyCu(curFrame, blockX, blockY, > param->internalCsp, param->rc.qgSize); > } > } > else > @@ -240,7 +240,7 @@ > else > { > uint32_t energy = acEnergyCu(curFrame, blockX, > blockY, param->internalCsp,param->rc.qgSize); > - qp_adj = strength * (X265_LOG2(X265_MAX(energy, 1)) - > (modeOneConst + 2 * (X265_DEPTH - 8))); > + qp_adj = strength * (X265_LOG2(X265_MAX(energy, 1)) - > (modeOneConst + 2 * (X265_DEPTH - 8))); > } > > if (param->bHDROpt) > @@ -308,6 +308,17 @@ > curFrame->m_lowres.wp_ssd[i] = ssd - (sum * sum + (width[i] * > height[i]) / 2) / (width[i] * height[i]); > } > } > + > + if (param->bDynamicRefine) > + { > + blockXY = 0; > + for (blockY = 0; blockY < maxRow; blockY += loopIncr) > + for (blockX = 0; blockX < maxCol; blockX += loopIncr) > + { > + curFrame->m_lowres.blockVariance[blockXY] = > acEnergyCu(curFrame, blockX, blockY, param->internalCsp, param->rc.qgSize); > + blockXY++; > + } > + } > } > > void LookaheadTLD::lowresIntraEstimate(Lowres& fenc, uint32_t qgSize) > > _______________________________________________ > x265-devel mailing list > x265-devel@videolan.org > https://mailman.videolan.org/listinfo/x265-devel > > Pushed.
_______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel