Re: [x265] [PATCH 2 of 2] dynamic-refine: enable switching between inter refinement levels 0-3

Ashok Kumar Mishra Fri, 09 Mar 2018 07:16:47 -0800

On Fri, Mar 9, 2018 at 7:12 PM, <bha...@multicorewareinc.com> wrote:


> # HG changeset patch
> # User Bhavna Hariharan <bha...@multicorewareinc.com>
> # Date 1520595579 -19800
> #      Fri Mar 09 17:09:39 2018 +0530
> # Node ID e5425bd33176d6366f34d93e80f9cb1c9c4ebe6f
> # Parent  d292dacb81d8607ce0b2fb106b7383b360863e9d
> dynamic-refine: enable switching between inter refinement levels 0-3
> based on the content and the encoder properties.
>
> The algorithm has 2 parts
> 1) Training - Encode frames with refine-inter 3 and calulate corresponding
> feature values until saturation of values. The training restarts when a
> scenecut
> is encountered.
> 2) Classification - Based on the prior probability calculated from the
> training
> data and the feature metric of the current CU an appropriate refine-inter
> level
> is chosen for the CU.
>
> diff -r d292dacb81d8 -r e5425bd33176 source/common/cudata.cpp
> --- a/source/common/cudata.cpp  Fri Mar 09 09:44:53 2018 +0530
> +++ b/source/common/cudata.cpp  Fri Mar 09 17:09:39 2018 +0530
> @@ -317,6 +317,16 @@
>      m_cuAboveLeft = (m_cuLeft && m_cuAbove) ?
> m_encData->getPicCTU(m_cuAddr - widthInCU - 1) : NULL;
>      m_cuAboveRight = (m_cuAbove && ((m_cuAddr % widthInCU) < (widthInCU -
> 1))) ? m_encData->getPicCTU(m_cuAddr - widthInCU + 1) : NULL;
>      memset(m_distortion, 0, m_numPartitions * sizeof(sse_t));
> +
> +    if (m_encData->m_param->bDynamicRefine)
> +    {
> +        int size = m_encData->m_param->maxCUDepth *
> X265_REFINE_INTER_LEVELS;
> +        CHECKED_MALLOC_ZERO(m_collectCURd, uint64_t, size);
> +        CHECKED_MALLOC_ZERO(m_collectCUVariance, uint32_t, size);
> +        CHECKED_MALLOC_ZERO(m_collectCUCount, uint32_t, size);
> +    }
> +fail:
> +    return;
>  }
>
>  // initialize Sub partition
> diff -r d292dacb81d8 -r e5425bd33176 source/common/cudata.h
> --- a/source/common/cudata.h    Fri Mar 09 09:44:53 2018 +0530
> +++ b/source/common/cudata.h    Fri Mar 09 17:09:39 2018 +0530
> @@ -224,6 +224,11 @@
>      uint64_t      m_fAc_den[3];
>      uint64_t      m_fDc_den[3];
>
> +    /* Feature values per CTU for dynamic refinement */
> +    uint64_t*       m_collectCURd;
> +    uint32_t*       m_collectCUVariance;
> +    uint32_t*       m_collectCUCount;
> +
>      CUData();
>
>      void     initialize(const CUDataMemPool& dataPool, uint32_t depth,
> const x265_param& param, int instance);
> diff -r d292dacb81d8 -r e5425bd33176 source/common/frame.cpp
> --- a/source/common/frame.cpp   Fri Mar 09 09:44:53 2018 +0530
> +++ b/source/common/frame.cpp   Fri Mar 09 17:09:39 2018 +0530
> @@ -53,6 +53,7 @@
>      m_addOnDepth = NULL;
>      m_addOnCtuInfo = NULL;
>      m_addOnPrevChange = NULL;
> +    m_classifyFrame = false;
>  }
>
>  bool Frame::create(x265_param *param, float* quantOffsets)
> @@ -85,6 +86,14 @@
>          m_analysis2Pass.analysisFramedata = NULL;
>      }
>
> +    if (param->bDynamicRefine)
> +    {
> +        int size = m_param->maxCUDepth * X265_REFINE_INTER_LEVELS;
> +        CHECKED_MALLOC_ZERO(m_classifyRd, uint64_t, size);
> +        CHECKED_MALLOC_ZERO(m_classifyVariance, uint64_t, size);
> +        CHECKED_MALLOC_ZERO(m_classifyCount, uint32_t, size);
> +    }
> +
>      if (m_fencPic->create(param, !!m_param->bCopyPicToFrame) &&
> m_lowres.create(m_fencPic, param->bframes, !!param->rc.aqMode ||
> !!param->bAQMotion, param->rc.qgSize))
>      {
>          X265_CHECK((m_reconColCount == NULL), "m_reconColCount was
> initialized");
> @@ -226,4 +235,11 @@
>      }
>      m_lowres.destroy();
>      X265_FREE(m_rcData);
> +
> +    if (m_param->bDynamicRefine)
> +    {
> +        X265_FREE_ZERO(m_classifyRd);
> +        X265_FREE_ZERO(m_classifyVariance);
> +        X265_FREE_ZERO(m_classifyCount);
> +    }
>  }
> diff -r d292dacb81d8 -r e5425bd33176 source/common/frame.h
> --- a/source/common/frame.h     Fri Mar 09 09:44:53 2018 +0530
> +++ b/source/common/frame.h     Fri Mar 09 17:09:39 2018 +0530
> @@ -122,6 +122,14 @@
>      uint8_t**              m_addOnDepth;
>      uint8_t**              m_addOnCtuInfo;
>      int**                  m_addOnPrevChange;
> +
> +    /* Average feature values of frames being considered for
> classification */
> +    uint64_t*              m_classifyRd;
> +    uint64_t*              m_classifyVariance;
> +    uint32_t*              m_classifyCount;
> +
> +    bool                   m_classifyFrame;
> +
>      Frame();
>
>      bool create(x265_param *param, float* quantOffsets);
> diff -r d292dacb81d8 -r e5425bd33176 source/common/lowres.cpp
> --- a/source/common/lowres.cpp  Fri Mar 09 09:44:53 2018 +0530
> +++ b/source/common/lowres.cpp  Fri Mar 09 17:09:39 2018 +0530
> @@ -59,10 +59,12 @@
>          CHECKED_MALLOC_ZERO(qpAqMotionOffset, double, cuCountFullRes);
>          CHECKED_MALLOC_ZERO(invQscaleFactor, int, cuCountFullRes);
>          CHECKED_MALLOC_ZERO(qpCuTreeOffset, double, cuCountFullRes);
> -        CHECKED_MALLOC_ZERO(blockVariance, uint32_t, cuCountFullRes);
>          if (qgSize == 8)
>              CHECKED_MALLOC_ZERO(invQscaleFactor8x8, int, cuCount);
>      }
> +
> +    if (origPic->m_param->bDynamicRefine)
> +        CHECKED_MALLOC_ZERO(blockVariance, uint32_t, cuCountFullRes);
>      CHECKED_MALLOC(propagateCost, uint16_t, cuCount);
>
>      /* allocate lowres buffers */
> diff -r d292dacb81d8 -r e5425bd33176 source/encoder/analysis.cpp
> --- a/source/encoder/analysis.cpp       Fri Mar 09 09:44:53 2018 +0530
> +++ b/source/encoder/analysis.cpp       Fri Mar 09 17:09:39 2018 +0530
> @@ -1184,7 +1184,7 @@
>
>          if (m_evaluateInter)
>          {
> -            if (m_param->interRefine == 2)
> +            if (m_refineLevel == 2)
>              {
>                  if (parentCTU.m_predMode[cuGeom.absPartIdx] == MODE_SKIP)
>                      skipModes = true;
> @@ -1307,7 +1307,7 @@
>              md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
>              checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP],
> md.pred[PRED_MERGE], cuGeom);
>              if (m_param->rdLevel)
> -                skipModes = (m_param->bEnableEarlySkip ||
> m_param->interRefine == 2)
> +                skipModes = (m_param->bEnableEarlySkip || m_refineLevel
> == 2)
>                  && md.bestMode && md.bestMode->cu.isSkipped(0); // TODO:
> sa8d threshold per depth
>          }
>          if (md.bestMode && m_param->bEnableRecursionSkip &&
> !bCtuInfoCheck && !(m_param->bMVType && m_param->analysisReuseLevel == 7 &&
> (m_modeFlag[0] || m_modeFlag[1])))
> @@ -1874,7 +1874,7 @@
>
>          if (m_evaluateInter)
>          {
> -            if (m_param->interRefine == 2)
> +            if (m_refineLevel == 2)
>              {
>                  if (parentCTU.m_predMode[cuGeom.absPartIdx] == MODE_SKIP)
>                      skipModes = true;
> @@ -2004,7 +2004,7 @@
>              md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
>              md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp);
>              checkMerge2Nx2N_rd5_6(md.pred[PRED_SKIP],
> md.pred[PRED_MERGE], cuGeom);
> -            skipModes = (m_param->bEnableEarlySkip ||
> m_param->interRefine == 2) &&
> +            skipModes = (m_param->bEnableEarlySkip || m_refineLevel == 2)
> &&
>                  md.bestMode && !md.bestMode->cu.getQtRootCbf(0);
>              refMasks[0] = allSplitRefs;
>              md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom, qp);
> @@ -2413,7 +2413,16 @@
>      bool mightNotSplit = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
>      bool bDecidedDepth = parentCTU.m_cuDepth[cuGeom.absPartIdx] == depth;
>
> -    int split = (m_param->interRefine && cuGeom.log2CUSize ==
> (uint32_t)(g_log2Size[m_param->minCUSize] + 1) && bDecidedDepth);
> +    TrainingData td;
> +    td.init(parentCTU, cuGeom);
> +
> +    if (!m_param->bDynamicRefine)
> +        m_refineLevel = m_param->interRefine;
> +    else
> +        m_refineLevel = m_frame->m_classifyFrame ? 0 : 3;
> +
> +    int split = (m_refineLevel && cuGeom.log2CUSize ==
> (uint32_t)(g_log2Size[m_param->minCUSize] + 1) && bDecidedDepth);
> +    td.split = split;
>
>      if (bDecidedDepth)
>      {
> @@ -2423,7 +2432,7 @@
>          md.bestMode = &mode;
>          mode.cu.initSubCU(parentCTU, cuGeom, qp);
>          PartSize size = (PartSize)parentCTU.m_
> partSize[cuGeom.absPartIdx];
> -        if (parentCTU.isIntra(cuGeom.absPartIdx) && m_param->interRefine
> < 2)
> +        if (parentCTU.isIntra(cuGeom.absPartIdx) && m_refineLevel < 2)
>          {
>              if (m_param->intraRefine == 4)
>                  compressIntraCU(parentCTU, cuGeom, qp);
> @@ -2439,7 +2448,7 @@
>                  checkIntra(mode, cuGeom, size);
>              }
>          }
> -        else if (!parentCTU.isIntra(cuGeom.absPartIdx) &&
> m_param->interRefine < 2)
> +        else if (!parentCTU.isIntra(cuGeom.absPartIdx) && m_refineLevel
> < 2)
>          {
>              mode.cu.copyFromPic(parentCTU, cuGeom, m_csp, false);
>              uint32_t numPU = parentCTU.getNumPartInter(
> cuGeom.absPartIdx);
> @@ -2501,7 +2510,7 @@
>                  }
>                  motionCompensation(mode.cu, pu, mode.predYuv, true,
> (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400));
>              }
> -            if (!m_param->interRefine && parentCTU.isSkipped(cuGeom.
> absPartIdx))
> +            if (!m_param->interRefine && !m_param->bDynamicRefine &&
> parentCTU.isSkipped(cuGeom.absPartIdx))
>                  encodeResAndCalcRdSkipCU(mode);
>              else
>                  encodeResAndCalcRdInterCU(mode, cuGeom);
> @@ -2512,7 +2521,7 @@
>                  checkDQP(mode, cuGeom);
>          }
>
> -        if (m_param->interRefine < 2)
> +        if (m_refineLevel < 2)
>          {
>              if (m_bTryLossless)
>                  tryLossless(cuGeom);
> @@ -2540,7 +2549,10 @@
>              }
>          }
>
> -        if (m_param->interRefine > 1 || (m_param->interRefine &&
> parentCTU.m_predMode[cuGeom.absPartIdx] == MODE_SKIP  &&
> !mode.cu.isSkipped(0)))
> +        if (m_param->bDynamicRefine)
> +            classifyCU(parentCTU,cuGeom, *md.bestMode, td);
> +
> +        if (m_refineLevel > 1 || (m_refineLevel &&
> parentCTU.m_predMode[cuGeom.absPartIdx] == MODE_SKIP  &&
> !mode.cu.isSkipped(0)))
>          {
>              m_evaluateInter = 1;
>              m_param->rdLevel > 4 ? compressInterCU_rd5_6(parentCTU,
> cuGeom, qp) : compressInterCU_rd0_4(parentCTU, cuGeom, qp);
> @@ -2599,7 +2611,7 @@
>          else
>              updateModeCost(*splitPred);
>
> -        if (m_param->interRefine)
> +        if (m_refineLevel)
>          {
>              if (m_param->rdLevel > 1)
>                  checkBestMode(*splitPred, cuGeom.depth);
> @@ -2613,6 +2625,83 @@
>          md.bestMode->cu.copyToPic(depth);
>          md.bestMode->reconYuv.copyToPicYuv(*m_frame->m_reconPic,
> parentCTU.m_cuAddr, cuGeom.absPartIdx);
>      }
> +    if (m_param->bDynamicRefine && bDecidedDepth)
> +        trainCU(parentCTU, cuGeom, *md.bestMode, td);
> +}
> +
> +void Analysis::classifyCU(const CUData& ctu, const CUGeom& cuGeom, const
> Mode& bestMode, TrainingData& trainData)
> +{
> +    uint32_t depth = cuGeom.depth;
> +    trainData.cuVariance = calculateCUVariance(ctu, cuGeom);
> +    if (m_frame->m_classifyFrame)
> +    {
> +        uint64_t diffRefine[X265_REFINE_INTER_LEVELS];
> +        uint64_t diffRefineRd[X265_REFINE_INTER_LEVELS];
> +        float probRefine[X265_REFINE_INTER_LEVELS] = { 0 };
> +        uint8_t varRefineLevel = 0;
> +        uint8_t rdRefineLevel = 0;
> +        uint64_t cuCost = bestMode.rdCost;
> +
> +        int offset = (depth * X265_REFINE_INTER_LEVELS) + 1;
> +        if (cuCost < m_frame->m_classifyRd[offset])
> +            m_refineLevel = 1;
> +        else
> +        {
> +            uint64_t trainingCount = 0;
> +            for (uint8_t i = 0; i < X265_REFINE_INTER_LEVELS; i++)
> +            {
> +                offset = (depth * X265_REFINE_INTER_LEVELS) + i;
> +                trainingCount += m_frame->m_classifyCount[offset];
> +            }
> +            for (uint8_t i = 0; i < X265_REFINE_INTER_LEVELS; i++)
> +            {
> +                offset = (depth * X265_REFINE_INTER_LEVELS) + i;
> +                /* Calculate distance values */
> +                diffRefine[i] = abs((int64_t)(trainData.cuVariance -
> m_frame->m_classifyVariance[offset]));
> +                diffRefineRd[i] = abs((int64_t)(cuCost -
> m_frame->m_classifyRd[offset]));
> +
> +                /* Calculate prior probability - ranges between 0 and 1 */
> +                if (trainingCount)
> +                    probRefine[i] = ((float)m_frame->m_classifyCount[offset]
> / (float)trainingCount);
> +
> +                /* Bayesian classification - P(c|x)P(x) = P(x|c)P(c)
> +                P(c|x) is the posterior probability of class given
> predictor.
> +                P(c) is the prior probability of class.
> +                P(x|c) is the likelihood which is the probability of
> predictor given class.
> +                P(x) is the prior probability of predictor.*/
> +                if ((diffRefine[i] * probRefine[m_refineLevel]) <
> (diffRefine[m_refineLevel] * probRefine[i]))
> +                    varRefineLevel = i;
> +                if ((diffRefineRd[i] * probRefine[m_refineLevel]) <
> (diffRefineRd[m_refineLevel] * probRefine[i]))
> +                    rdRefineLevel = i;
> +            }
> +            m_refineLevel = X265_MAX(varRefineLevel, rdRefineLevel);
> +        }
> +    }
> +}
> +
> +void Analysis::trainCU(const CUData& ctu, const CUGeom& cuGeom, const
> Mode& bestMode, TrainingData& trainData)
> +{
> +    uint32_t depth = cuGeom.depth;
> +    int classify = 0;
> +    if (!m_frame->m_classifyFrame)
> +    {
> +        if (trainData.predMode == ctu.m_predMode[cuGeom.absPartIdx] &&
> trainData.partSize == ctu.m_partSize[cuGeom.absPartIdx]
> +            && trainData.mergeFlag == ctu.m_mergeFlag[cuGeom.absPartIdx])
> +            classify = 0;
> +        else if ((depth == m_param->maxCUDepth - 1) && trainData.split)
> +            classify = 1;
> +        else if (trainData.partSize == SIZE_2Nx2N && trainData.partSize
> == ctu.m_partSize[cuGeom.absPartIdx])
> +            classify = 2;
> +        else
> +            classify = 3;
> +    }
> +    else
> +        classify = m_refineLevel;
> +    uint64_t cuCost = bestMode.rdCost;
> +    int offset = (depth * X265_REFINE_INTER_LEVELS) + classify;
> +    ctu.m_collectCURd[offset] += cuCost;
> +    ctu.m_collectCUVariance[offset] += trainData.cuVariance;
> +    ctu.m_collectCUCount[offset]++;
>  }
>
>  /* sets md.bestMode if a valid merge candidate is found, else leaves it
> NULL */
> @@ -3414,6 +3503,33 @@
>      return false;
>  }
>
> +uint32_t Analysis::calculateCUVariance(const CUData& ctu, const CUGeom&
> cuGeom)
> +{
> +    uint32_t cuVariance = 0;
> +    uint32_t *blockVariance = m_frame->m_lowres.blockVariance;
> +    int loopIncr = (m_param->rc.qgSize == 8) ? 8 : 16;
> +
> +    uint32_t width = m_frame->m_fencPic->m_picWidth;
> +    uint32_t height = m_frame->m_fencPic->m_picHeight;
> +    uint32_t block_x = ctu.m_cuPelX + g_zscanToPelX[cuGeom.absPartIdx];
> +    uint32_t block_y = ctu.m_cuPelY + g_zscanToPelY[cuGeom.absPartIdx];
> +    uint32_t maxCols = (m_frame->m_fencPic->m_picWidth + (loopIncr - 1))
> / loopIncr;
> +    uint32_t blockSize = m_param->maxCUSize >> cuGeom.depth;
> +    uint32_t cnt = 0;
> +
> +    for (uint32_t block_yy = block_y; block_yy < block_y + blockSize &&
> block_yy < height; block_yy += loopIncr)
> +    {
> +        for (uint32_t block_xx = block_x; block_xx < block_x + blockSize
> && block_xx < width; block_xx += loopIncr)
> +        {
> +            uint32_t idx = ((block_yy / loopIncr) * (maxCols)) +
> (block_xx / loopIncr);
> +            cuVariance += blockVariance[idx];
> +            cnt++;
> +        }
> +    }
> +
> +    return cuVariance / cnt;
> +}
> +
>  int Analysis::calculateQpforCuSize(const CUData& ctu, const CUGeom&
> cuGeom, int32_t complexCheck, double baseQp)
>  {
>      FrameData& curEncData = *m_frame->m_encData;
> diff -r d292dacb81d8 -r e5425bd33176 source/encoder/analysis.h
> --- a/source/encoder/analysis.h Fri Mar 09 09:44:53 2018 +0530
> +++ b/source/encoder/analysis.h Fri Mar 09 17:09:39 2018 +0530
> @@ -142,8 +142,29 @@
>      uint8_t*                m_multipassModes;
>
>      uint8_t                 m_evaluateInter;
> +    int32_t                 m_refineLevel;
> +
>      uint8_t*                m_additionalCtuInfo;
>      int*                    m_prevCtuInfoChange;
> +
> +    struct TrainingData
> +    {
> +        uint32_t cuVariance;
> +        uint8_t predMode;
> +        uint8_t partSize;
> +        uint8_t mergeFlag;
> +        int split;
> +
> +        void init(const CUData& parentCTU, const CUGeom& cuGeom)
> +        {
> +            cuVariance = 0;
> +            predMode = parentCTU.m_predMode[cuGeom.absPartIdx];
> +            partSize = parentCTU.m_partSize[cuGeom.absPartIdx];
> +            mergeFlag = parentCTU.m_mergeFlag[cuGeom.absPartIdx];
> +            split = 0;
> +        }
> +    };
> +
>      /* refine RD based on QP for rd-levels 5 and 6 */
>      void qprdRefine(const CUData& parentCTU, const CUGeom& cuGeom,
> int32_t qp, int32_t lqp);
>
> @@ -182,6 +203,10 @@
>      void encodeResidue(const CUData& parentCTU, const CUGeom& cuGeom);
>
>      int calculateQpforCuSize(const CUData& ctu, const CUGeom& cuGeom,
> int32_t complexCheck = 0, double baseQP = -1);
> +    uint32_t calculateCUVariance(const CUData& ctu, const CUGeom& cuGeom);
> +
> +    void classifyCU(const CUData& ctu, const CUGeom& cuGeom, const Mode&
> bestMode, TrainingData& trainData);
> +    void trainCU(const CUData& ctu, const CUGeom& cuGeom, const Mode&
> bestMode, TrainingData& trainData);
>
>      void calculateNormFactor(CUData& ctu, int qp);
>      void normFactor(const pixel* src, uint32_t blockSize, CUData& ctu,
> int qp, TextType ttype);
> diff -r d292dacb81d8 -r e5425bd33176 source/encoder/encoder.cpp
> --- a/source/encoder/encoder.cpp        Fri Mar 09 09:44:53 2018 +0530
> +++ b/source/encoder/encoder.cpp        Fri Mar 09 17:09:39 2018 +0530
> @@ -96,6 +96,7 @@
>  #endif
>
>      m_prevTonemapPayload.payload = NULL;
> +    m_startPoint = 0;
>  }
>  inline char *strcatFilename(const char *input, const char *suffix)
>  {
> @@ -412,6 +413,17 @@
>      if (m_bToneMap)
>          m_numCimInfo = m_hdr10plus_api->hdr10plus_
> json_to_movie_cim(m_param->toneMapFile, m_cim);
>  #endif
> +
> +    if (m_param->bDynamicRefine)
> +    {
> +        int size = m_param->totalFrames * m_param->maxCUDepth *
> X265_REFINE_INTER_LEVELS;
> +        CHECKED_MALLOC_ZERO(m_variance, uint64_t, size);
> +        CHECKED_MALLOC_ZERO(m_rdCost, uint64_t, size);
> +        CHECKED_MALLOC_ZERO(m_trainingCount, uint32_t, size);
> +        return;
> +    fail:
> +        m_aborted = true;
> +    }
>  }
>
>  void Encoder::stopJobs()
> @@ -697,7 +709,13 @@
>      if (m_bToneMap)
>          m_hdr10plus_api->hdr10plus_clear_movie(m_cim, m_numCimInfo);
>  #endif
> -
> +
> +    if (m_param->bDynamicRefine)
> +    {
> +        X265_FREE(m_variance);
> +        X265_FREE(m_rdCost);
> +        X265_FREE(m_trainingCount);
> +    }
>      if (m_exportedPic)
>      {
>          ATOMIC_DEC(&m_exportedPic->m_countRefEncoders);
> diff -r d292dacb81d8 -r e5425bd33176 source/encoder/encoder.h
> --- a/source/encoder/encoder.h  Fri Mar 09 09:44:53 2018 +0530
> +++ b/source/encoder/encoder.h  Fri Mar 09 17:09:39 2018 +0530
> @@ -221,6 +221,13 @@
>
>      x265_sei_payload        m_prevTonemapPayload;
>
> +    /* Collect frame level feature data */
> +    uint64_t*               m_rdCost;
> +    uint64_t*               m_variance;
> +    uint32_t*               m_trainingCount;
> +    int32_t                 m_startPoint;
> +    Lock                    m_dynamicRefineLock;
> +
>      Encoder();
>      ~Encoder()
>      {
> diff -r d292dacb81d8 -r e5425bd33176 source/encoder/frameencoder.cpp
> --- a/source/encoder/frameencoder.cpp   Fri Mar 09 09:44:53 2018 +0530
> +++ b/source/encoder/frameencoder.cpp   Fri Mar 09 17:09:39 2018 +0530
> @@ -736,6 +736,9 @@
>              m_top->m_rateControl->m_startEndOrder.incr(); // faked
> rateControlEnd calls for negative frames
>      }
>
> +    if (m_param->bDynamicRefine)
> +        computeAvgTrainingData();
> +
>      /* Analyze CTU rows, most of the hard work is done here.  Frame is
>       * compressed in a wave-front pattern if WPP is enabled. Row based
> loop
>       * filters runs behind the CTU compression and reconstruction */
> @@ -1457,6 +1460,30 @@
>          // Does all the CU analysis, returns best top level mode decision
>          Mode& best = tld.analysis.compressCTU(*ctu, *m_frame,
> m_cuGeoms[m_ctuGeomMap[cuAddr]], rowCoder);
>
> +        if (m_param->bDynamicRefine)
> +        {
> +            {
> +                ScopedLock dynLock(m_top->m_dynamicRefineLock);
> +                for (uint32_t i = 0; i < X265_REFINE_INTER_LEVELS; i++)
> +                {
> +                    for (uint32_t depth = 0; depth < m_param->maxCUDepth;
> depth++)
> +                    {
> +                        int offset = (depth * X265_REFINE_INTER_LEVELS) +
> i;
> +                        int index = (m_frame->m_encodeOrder *
> X265_REFINE_INTER_LEVELS * m_param->maxCUDepth) + offset;
> +                        if (ctu->m_collectCUCount[offset])
> +                        {
> +                            m_top->m_variance[index] +=
> ctu->m_collectCUVariance[offset];
> +                            m_top->m_rdCost[index] +=
> ctu->m_collectCURd[offset];
> +                            m_top->m_trainingCount[index] +=
> ctu->m_collectCUCount[offset];
> +                        }
> +                    }
> +                }
> +            }
> +            X265_FREE_ZERO(ctu->m_collectCUVariance);
> +            X265_FREE_ZERO(ctu->m_collectCURd);
> +            X265_FREE_ZERO(ctu->m_collectCUCount);
> +        }
> +
>          // take a sample of the current active worker count
>          ATOMIC_ADD(&m_totalActiveWorkerCount, m_activeWorkerCount);
>          ATOMIC_INC(&m_activeWorkerCountSamples);
> @@ -1839,6 +1866,58 @@
>          m_completionEvent.trigger();
>  }
>
> +void FrameEncoder::computeAvgTrainingData()
> +{
> +    if (m_frame->m_lowres.bScenecut)
> +        m_top->m_startPoint = m_frame->m_encodeOrder;
> +
> +    if (m_frame->m_encodeOrder - m_top->m_startPoint < 2 *
> m_param->frameNumThreads)
> +        m_frame->m_classifyFrame = false;
> +    else
> +        m_frame->m_classifyFrame = true;
> +
> +    int size = m_param->maxCUDepth * X265_REFINE_INTER_LEVELS;
> +    memset(m_frame->m_classifyRd, 0, size * sizeof(uint64_t));
> +    memset(m_frame->m_classifyVariance, 0, size * sizeof(uint64_t));
> +    memset(m_frame->m_classifyCount, 0, size * sizeof(uint32_t));
> +
> +    if (m_frame->m_classifyFrame)
> +    {
> +        uint32_t limit = m_frame->m_encodeOrder -
> m_param->frameNumThreads - 1;
> +        for (uint32_t i = m_top->m_startPoint + 1; i < limit; i++)
> +        {
> +            for (uint32_t j = 0; j < X265_REFINE_INTER_LEVELS; j++)
> +            {
> +                for (uint32_t depth = 0; depth < m_param->maxCUDepth;
> depth++)
> +                {
> +                    int offset = (depth * X265_REFINE_INTER_LEVELS) + j;
> +                    int index = (i* X265_REFINE_INTER_LEVELS *
> m_param->maxCUDepth) + offset;
> +                    if (m_top->m_trainingCount[index])
> +                    {
> +                        m_frame->m_classifyRd[offset] +=
> m_top->m_rdCost[index] / m_top->m_trainingCount[index];
> +                        m_frame->m_classifyVariance[offset] +=
> m_top->m_variance[index] / m_top->m_trainingCount[index];
> +                        m_frame->m_classifyCount[offset] +=
> m_top->m_trainingCount[index];
> +                    }
> +                }
> +            }
> +        }
> +        /* Calculates the average feature values of historic frames that
> are being considered for the current frame */
> +        int historyCount = m_frame->m_encodeOrder -
> m_param->frameNumThreads - m_top->m_startPoint - 1;
> +        if (historyCount)
> +        {
> +            for (uint32_t j = 0; j < X265_REFINE_INTER_LEVELS; j++)
> +            {
> +                for (uint32_t depth = 0; depth < m_param->maxCUDepth;
> depth++)
> +                {
> +                    int offset = (depth * X265_REFINE_INTER_LEVELS) + j;
> +                    m_frame->m_classifyRd[offset] /= historyCount;
> +                    m_frame->m_classifyVariance[offset] /= historyCount;
> +                }
> +            }
> +        }
> +    }
> +}
> +
>  /* collect statistics about CU coding decisions, return total QP */
>  int FrameEncoder::collectCTUStatistics(const CUData& ctu, FrameStats*
> log)
>  {
> diff -r d292dacb81d8 -r e5425bd33176 source/encoder/frameencoder.h
> --- a/source/encoder/frameencoder.h     Fri Mar 09 09:44:53 2018 +0530
> +++ b/source/encoder/frameencoder.h     Fri Mar 09 17:09:39 2018 +0530
> @@ -230,6 +230,7 @@
>      void threadMain();
>      int  collectCTUStatistics(const CUData& ctu, FrameStats* frameLog);
>      void noiseReductionUpdate();
> +    void computeAvgTrainingData();
>
>      /* Called by WaveFront::findJob() */
>      virtual void processRow(int row, int threadId);
> diff -r d292dacb81d8 -r e5425bd33176 source/encoder/slicetype.cpp
> --- a/source/encoder/slicetype.cpp      Fri Mar 09 09:44:53 2018 +0530
> +++ b/source/encoder/slicetype.cpp      Fri Mar 09 17:09:39 2018 +0530
> @@ -178,12 +178,12 @@
>              }
>          }
>
> -        /* Need variance data for weighted prediction */
> +        /* Need variance data for weighted prediction and dynamic
> refinement*/
>          if (param->bEnableWeightedPred || param->bEnableWeightedBiPred)
> -        {
> +        {
>              for (blockY = 0; blockY < maxRow; blockY += loopIncr)
> -                for (blockX = 0; blockX < maxCol; blockX += loopIncr)
> -                    acEnergyCu(curFrame, blockX, blockY,
> param->internalCsp, param->rc.qgSize);
> +                for (blockX = 0; blockX < maxCol; blockX += loopIncr)
> +                    acEnergyCu(curFrame, blockX, blockY,
> param->internalCsp, param->rc.qgSize);
>          }
>      }
>      else
> @@ -240,7 +240,7 @@
>                  else
>                  {
>                      uint32_t energy = acEnergyCu(curFrame, blockX,
> blockY, param->internalCsp,param->rc.qgSize);
> -                    qp_adj = strength * (X265_LOG2(X265_MAX(energy, 1)) -
> (modeOneConst + 2 * (X265_DEPTH - 8)));
> +                    qp_adj = strength * (X265_LOG2(X265_MAX(energy, 1)) -
> (modeOneConst + 2 * (X265_DEPTH - 8)));
>                  }
>
>                  if (param->bHDROpt)
> @@ -308,6 +308,17 @@
>              curFrame->m_lowres.wp_ssd[i] = ssd - (sum * sum + (width[i] *
> height[i]) / 2) / (width[i] * height[i]);
>          }
>      }
> +
> +    if (param->bDynamicRefine)
> +    {
> +        blockXY = 0;
> +        for (blockY = 0; blockY < maxRow; blockY += loopIncr)
> +            for (blockX = 0; blockX < maxCol; blockX += loopIncr)
> +            {
> +                curFrame->m_lowres.blockVariance[blockXY] =
> acEnergyCu(curFrame, blockX, blockY, param->internalCsp, param->rc.qgSize);
> +                blockXY++;
> +            }
> +    }
>  }
>
>  void LookaheadTLD::lowresIntraEstimate(Lowres& fenc, uint32_t qgSize)
>
> _______________________________________________
> x265-devel mailing list
> x265-devel@videolan.org
> https://mailman.videolan.org/listinfo/x265-devel
>
>
Pushed.

_______________________________________________
x265-devel mailing list
x265-devel@videolan.org
https://mailman.videolan.org/listinfo/x265-devel

Re: [x265] [PATCH 2 of 2] dynamic-refine: enable switching between inter refinement levels 0-3

Reply via email to