# HG changeset patch # User Ashok Kumar Mishra <as...@multicorewareinc.com> # Date 1496656244 -19800 # Mon Jun 05 15:20:44 2017 +0530 # Node ID c04d02d71f206431b6b6e60460b81dcc85fc5db5 # Parent de49a722b256d94c9ba30b5d88459026bea528b8 MV refinement for multipass encoding
diff -r de49a722b256 -r c04d02d71f20 doc/reST/cli.rst --- a/doc/reST/cli.rst Wed May 24 20:01:59 2017 +0530 +++ b/doc/reST/cli.rst Mon Jun 05 15:20:44 2017 +0530 @@ -911,6 +911,12 @@ inter modes for blocks of size one smaller than the min-cu-size of the incoming analysis data from the previous encode. Default disabled. +.. option:: --refine-mv + + Enables refinement of motion vector for scaled video. Evaluates the best + motion vector by searching the surrounding eight integer and subpel pixel + positions. + Options which affect the transform unit quad-tree, sometimes referred to as the residual quad-tree (RQT). diff -r de49a722b256 -r c04d02d71f20 source/CMakeLists.txt --- a/source/CMakeLists.txt Wed May 24 20:01:59 2017 +0530 +++ b/source/CMakeLists.txt Mon Jun 05 15:20:44 2017 +0530 @@ -29,7 +29,7 @@ option(STATIC_LINK_CRT "Statically link C runtime for release builds" OFF) mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD) # X265_BUILD must be incremented each time the public API is changed -set(X265_BUILD 120) +set(X265_BUILD 121) configure_file("${PROJECT_SOURCE_DIR}/x265.def.in" "${PROJECT_BINARY_DIR}/x265.def") configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in" diff -r de49a722b256 -r c04d02d71f20 source/common/param.cpp --- a/source/common/param.cpp Wed May 24 20:01:59 2017 +0530 +++ b/source/common/param.cpp Mon Jun 05 15:20:44 2017 +0530 @@ -280,6 +280,7 @@ param->scaleFactor = 0; param->intraRefine = 0; param->interRefine = 0; + param->mvRefine = 0; } int x265_param_default_preset(x265_param* param, const char* preset, const char* tune) @@ -963,6 +964,7 @@ OPT("scale-factor") p->scaleFactor = atoi(value); OPT("refine-intra")p->intraRefine = atobool(value); OPT("refine-inter")p->interRefine = atobool(value); + OPT("refine-mv")p->mvRefine = atobool(value); else return X265_PARAM_BAD_NAME; } @@ -1685,6 +1687,7 @@ s += sprintf(s, " scale-factor=%d", p->scaleFactor); s += sprintf(s, " refine-intra=%d", p->intraRefine); s += sprintf(s, " refine-inter=%d", p->interRefine); + s += sprintf(s, " refine-mv=%d", p->mvRefine); BOOL(p->bLimitSAO, "limit-sao"); s += sprintf(s, " ctu-info=%d", p->bCTUInfo); #undef BOOL diff -r de49a722b256 -r c04d02d71f20 source/encoder/analysis.cpp --- a/source/encoder/analysis.cpp Wed May 24 20:01:59 2017 +0530 +++ b/source/encoder/analysis.cpp Mon Jun 05 15:20:44 2017 +0530 @@ -2267,14 +2267,16 @@ int cuIdx = (mode.cu.m_cuAddr * parentCTU.m_numPartitions) + cuGeom.absPartIdx; mode.cu.m_mergeFlag[pu.puAbsPartIdx] = interDataCTU->mergeFlag[cuIdx + part]; mode.cu.setPUInterDir(interDataCTU->interDir[cuIdx + part], pu.puAbsPartIdx, part); - for (int dir = 0; dir < m_slice->isInterB() + 1; dir++) + for (int list = 0; list < m_slice->isInterB() + 1; list++) { - mode.cu.setPUMv(dir, interDataCTU->mv[dir][cuIdx + part], pu.puAbsPartIdx, part); - mode.cu.setPURefIdx(dir, interDataCTU->refIdx[dir][cuIdx + part], pu.puAbsPartIdx, part); - mode.cu.m_mvpIdx[dir][pu.puAbsPartIdx] = interDataCTU->mvpIdx[dir][cuIdx + part]; + mode.cu.setPUMv(list, interDataCTU->mv[list][cuIdx + part], pu.puAbsPartIdx, part); + mode.cu.setPURefIdx(list, interDataCTU->refIdx[list][cuIdx + part], pu.puAbsPartIdx, part); + mode.cu.m_mvpIdx[list][pu.puAbsPartIdx] = interDataCTU->mvpIdx[list][cuIdx + part]; } if (!mode.cu.m_mergeFlag[pu.puAbsPartIdx]) { + if (m_param->mvRefine) + m_me.setSourcePU(*mode.fencYuv, pu.ctuAddr, pu.cuAbsPartIdx, pu.puAbsPartIdx, pu.width, pu.height, m_param->searchMethod, m_param->subpelRefine, false); //AMVP MV mvc[(MD_ABOVE_LEFT + 1) * 2 + 2]; mode.cu.getNeighbourMV(part, pu.puAbsPartIdx, mode.interNeighbours); @@ -2285,6 +2287,12 @@ continue; mode.cu.getPMV(mode.interNeighbours, list, ref, mode.amvpCand[list][ref], mvc); MV mvp = mode.amvpCand[list][ref][mode.cu.m_mvpIdx[list][pu.puAbsPartIdx]]; + if (m_param->mvRefine) + { + MV outmv; + searchMV(mode, pu, list, ref, outmv); + mode.cu.setPUMv(list, outmv, pu.puAbsPartIdx, part); + } mode.cu.m_mvd[list][pu.puAbsPartIdx] = mode.cu.m_mv[list][pu.puAbsPartIdx] - mvp; } } @@ -2293,7 +2301,6 @@ MVField candMvField[MRG_MAX_NUM_CANDS][2]; // double length for mv of both lists uint8_t candDir[MRG_MAX_NUM_CANDS]; mode.cu.getInterMergeCandidates(pu.puAbsPartIdx, part, candMvField, candDir); - mode.cu.m_mvpIdx[0][pu.puAbsPartIdx] = interDataCTU->mvpIdx[0][cuIdx + part]; uint8_t mvpIdx = mode.cu.m_mvpIdx[0][pu.puAbsPartIdx]; mode.cu.setPUInterDir(candDir[mvpIdx], pu.puAbsPartIdx, part); mode.cu.setPUMv(0, candMvField[mvpIdx][0].mv, pu.puAbsPartIdx, part); diff -r de49a722b256 -r c04d02d71f20 source/encoder/encoder.cpp --- a/source/encoder/encoder.cpp Wed May 24 20:01:59 2017 +0530 +++ b/source/encoder/encoder.cpp Mon Jun 05 15:20:44 2017 +0530 @@ -2310,6 +2310,15 @@ x265_log(p, X265_LOG_WARNING, "Inter refinement does not support limitTU. Disabling limitTU.\n"); p->limitTU = 0; } + + if (p->mvRefine) + { + if (p->analysisMode != X265_ANALYSIS_LOAD || p->analysisRefineLevel < 10 || !p->scaleFactor) + { + x265_log(p, X265_LOG_WARNING, "MV refinement requires analysis load, refine-level 10, scale factor. Disabling inter refine.\n"); + p->mvRefine = 0; + } + } if ((p->analysisMultiPassRefine || p->analysisMultiPassDistortion) && (p->bDistributeModeAnalysis || p->bDistributeMotionEstimation)) { diff -r de49a722b256 -r c04d02d71f20 source/encoder/motion.cpp --- a/source/encoder/motion.cpp Wed May 24 20:01:59 2017 +0530 +++ b/source/encoder/motion.cpp Mon Jun 05 15:20:44 2017 +0530 @@ -598,6 +598,139 @@ } } +void MotionEstimate::refineMV(ReferencePlanes* ref, + const MV& mvmin, + const MV& mvmax, + const MV& qmvp, + MV& outQMv) +{ + ALIGN_VAR_16(int, costs[16]); + if (ctuAddr >= 0) + blockOffset = ref->reconPic->getLumaAddr(ctuAddr, absPartIdx) - ref->reconPic->getLumaAddr(0); + intptr_t stride = ref->lumaStride; + pixel* fenc = fencPUYuv.m_buf[0]; + pixel* fref = ref->fpelPlane[0] + blockOffset; + + setMVP(qmvp); + + MV qmvmin = mvmin.toQPel(); + MV qmvmax = mvmax.toQPel(); + + /* The term cost used here means satd/sad values for that particular search. + * The costs used in ME integer search only includes the SAD cost of motion + * residual and sqrtLambda times MVD bits. The subpel refine steps use SATD + * cost of residual and sqrtLambda * MVD bits. + */ + + // measure SATD cost at clipped QPEL MVP + MV pmv = qmvp.clipped(qmvmin, qmvmax); + MV bestpre = pmv; + int bprecost; + + bprecost = subpelCompare(ref, pmv, sad); + + /* re-measure full pel rounded MVP with SAD as search start point */ + MV bmv = pmv.roundToFPel(); + int bcost = bprecost; + if (pmv.isSubpel()) + bcost = sad(fenc, FENC_STRIDE, fref + bmv.x + bmv.y * stride, stride) + mvcost(bmv << 2); + + /* square refine */ + int dir = 0; + COST_MV_X4_DIR(0, -1, 0, 1, -1, 0, 1, 0, costs); + if ((bmv.y - 1 >= mvmin.y) & (bmv.y - 1 <= mvmax.y)) + COPY2_IF_LT(bcost, costs[0], dir, 1); + if ((bmv.y + 1 >= mvmin.y) & (bmv.y + 1 <= mvmax.y)) + COPY2_IF_LT(bcost, costs[1], dir, 2); + COPY2_IF_LT(bcost, costs[2], dir, 3); + COPY2_IF_LT(bcost, costs[3], dir, 4); + COST_MV_X4_DIR(-1, -1, -1, 1, 1, -1, 1, 1, costs); + if ((bmv.y - 1 >= mvmin.y) & (bmv.y - 1 <= mvmax.y)) + COPY2_IF_LT(bcost, costs[0], dir, 5); + if ((bmv.y + 1 >= mvmin.y) & (bmv.y + 1 <= mvmax.y)) + COPY2_IF_LT(bcost, costs[1], dir, 6); + if ((bmv.y - 1 >= mvmin.y) & (bmv.y - 1 <= mvmax.y)) + COPY2_IF_LT(bcost, costs[2], dir, 7); + if ((bmv.y + 1 >= mvmin.y) & (bmv.y + 1 <= mvmax.y)) + COPY2_IF_LT(bcost, costs[3], dir, 8); + bmv += square1[dir]; + + if (bprecost < bcost) + { + bmv = bestpre; + bcost = bprecost; + } + else + bmv = bmv.toQPel(); // promote search bmv to qpel + + // TO DO: Change SubpelWorkload to fine tune MV + // Now it is set to 5 for experiment. + // const SubpelWorkload& wl = workload[this->subpelRefine]; + const SubpelWorkload& wl = workload[5]; + + pixelcmp_t hpelcomp; + + if (wl.hpel_satd) + { + bcost = subpelCompare(ref, bmv, satd) + mvcost(bmv); + hpelcomp = satd; + } + else + hpelcomp = sad; + + for (int iter = 0; iter < wl.hpel_iters; iter++) + { + int bdir = 0; + for (int i = 1; i <= wl.hpel_dirs; i++) + { + MV qmv = bmv + square1[i] * 2; + + // check mv range for slice bound + if ((qmv.y < qmvmin.y) | (qmv.y > qmvmax.y)) + continue; + + int cost = subpelCompare(ref, qmv, hpelcomp) + mvcost(qmv); + COPY2_IF_LT(bcost, cost, bdir, i); + } + + if (bdir) + bmv += square1[bdir] * 2; + else + break; + } + + /* if HPEL search used SAD, remeasure with SATD before QPEL */ + if (!wl.hpel_satd) + bcost = subpelCompare(ref, bmv, satd) + mvcost(bmv); + + for (int iter = 0; iter < wl.qpel_iters; iter++) + { + int bdir = 0; + for (int i = 1; i <= wl.qpel_dirs; i++) + { + MV qmv = bmv + square1[i]; + + // check mv range for slice bound + if ((qmv.y < qmvmin.y) | (qmv.y > qmvmax.y)) + continue; + + int cost = subpelCompare(ref, qmv, satd) + mvcost(qmv); + COPY2_IF_LT(bcost, cost, bdir, i); + } + + if (bdir) + bmv += square1[bdir]; + else + break; + } + + // check mv range for slice bound + X265_CHECK(((pmv.y >= qmvmin.y) & (pmv.y <= qmvmax.y)), "mv beyond range!"); + + x265_emms(); + outQMv = bmv; +} + int MotionEstimate::motionEstimate(ReferencePlanes *ref, const MV & mvmin, const MV & mvmax, diff -r de49a722b256 -r c04d02d71f20 source/encoder/motion.h --- a/source/encoder/motion.h Wed May 24 20:01:59 2017 +0530 +++ b/source/encoder/motion.h Mon Jun 05 15:20:44 2017 +0530 @@ -92,6 +92,7 @@ chromaSatd(refYuv.getCrAddr(puPartIdx), refYuv.m_csize, fencPUYuv.m_buf[2], fencPUYuv.m_csize); } + void refineMV(ReferencePlanes* ref, const MV& mvmin, const MV& mvmax, const MV& qmvp, MV& outQMv); int motionEstimate(ReferencePlanes* ref, const MV & mvmin, const MV & mvmax, const MV & qmvp, int numCandidates, const MV * mvc, int merange, MV & outQMv, pixel *srcReferencePlane = 0); int subpelCompare(ReferencePlanes* ref, const MV &qmv, pixelcmp_t); diff -r de49a722b256 -r c04d02d71f20 source/encoder/search.cpp --- a/source/encoder/search.cpp Wed May 24 20:01:59 2017 +0530 +++ b/source/encoder/search.cpp Mon Jun 05 15:20:44 2017 +0530 @@ -2108,6 +2108,17 @@ } } +void Search::searchMV(Mode& interMode, const PredictionUnit& pu, int list, int ref, MV& outmv) +{ + CUData& cu = interMode.cu; + const Slice *slice = m_slice; + MV mv = cu.m_mv[list][pu.puAbsPartIdx]; + cu.clipMv(mv); + MV mvmin, mvmax; + setSearchRange(cu, mv, m_param->searchRange, mvmin, mvmax); + m_me.refineMV(&slice->m_mref[list][ref], mvmin, mvmax, mv, outmv); +} + /* find the best inter prediction for each PU of specified mode */ void Search::predInterSearch(Mode& interMode, const CUGeom& cuGeom, bool bChromaMC, uint32_t refMasks[2]) { diff -r de49a722b256 -r c04d02d71f20 source/encoder/search.h --- a/source/encoder/search.h Wed May 24 20:01:59 2017 +0530 +++ b/source/encoder/search.h Mon Jun 05 15:20:44 2017 +0530 @@ -311,6 +311,7 @@ // estimation inter prediction (non-skip) void predInterSearch(Mode& interMode, const CUGeom& cuGeom, bool bChromaMC, uint32_t masks[2]); + void searchMV(Mode& interMode, const PredictionUnit& pu, int list, int ref, MV& outmv); // encode residual and compute rd-cost for inter mode void encodeResAndCalcRdInterCU(Mode& interMode, const CUGeom& cuGeom); void encodeResAndCalcRdSkipCU(Mode& interMode); diff -r de49a722b256 -r c04d02d71f20 source/x265.h --- a/source/x265.h Wed May 24 20:01:59 2017 +0530 +++ b/source/x265.h Mon Jun 05 15:20:44 2017 +0530 @@ -1449,6 +1449,9 @@ /* Enable inter refinement in load mode*/ int interRefine; + /* Enable motion vector refinement in load mode*/ + int mvRefine; + } x265_param; /* x265_param_alloc: diff -r de49a722b256 -r c04d02d71f20 source/x265cli.h --- a/source/x265cli.h Wed May 24 20:01:59 2017 +0530 +++ b/source/x265cli.h Mon Jun 05 15:20:44 2017 +0530 @@ -277,6 +277,8 @@ { "dhdr10-info", required_argument, NULL, 0 }, { "dhdr10-opt", no_argument, NULL, 0}, { "no-dhdr10-opt", no_argument, NULL, 0}, + { "refine-mv", no_argument, NULL, 0 }, + { "no-refine-mv", no_argument, NULL, 0 }, { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, @@ -448,6 +450,7 @@ H0(" --scale-factor <int> Specify factor by which input video is scaled down for analysis save mode. Default %d\n", param->scaleFactor); H0(" --[no-]refine-intra Enable intra refinement for load mode. Default %s\n", OPT(param->intraRefine)); H0(" --[no-]refine-inter Enable inter refinement for load mode. Default %s\n", OPT(param->interRefine)); + H0(" --[no-]refine-mv Enable mv refinement for load mode. Default %s\n", OPT(param->mvRefine)); H0(" --aq-mode <integer> Mode for Adaptive Quantization - 0:none 1:uniform AQ 2:auto variance 3:auto variance with bias to dark scenes. Default %d\n", param->rc.aqMode); H0(" --aq-strength <float> Reduces blocking and blurring in flat and textured areas (0 to 3.0). Default %.2f\n", param->rc.aqStrength); H0(" --[no-]aq-motion Adaptive Quantization based on the relative motion of each CU w.r.t., frame. Default %s\n", OPT(param->bOptCUDeltaQP)); _______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel