Pushed to default. On Wed, Jul 10, 2019 at 9:47 AM <[email protected]> wrote:
> # HG changeset patch > # User Pooja Venkatesan <[email protected]> > # Date 1562305646 -19800 > # Fri Jul 05 11:17:26 2019 +0530 > # Node ID 14a235657a2011aa28d45544f33b7186c33b9218 > # Parent 4f6dde51a5db4f9229bddb60db176f16ac98f505 > motion: Implement 3-level Hierarchial Motion Estimation > > This patch does the following: > 1) Create HME-level 0 planes > 2) Add option "--hme" and "--hme-search" to enable HME > and to select search method for levels 0, 1 and 2 > > diff -r 4f6dde51a5db -r 14a235657a20 doc/reST/cli.rst > --- a/doc/reST/cli.rst Fri Jul 05 10:47:15 2019 +0530 > +++ b/doc/reST/cli.rst Fri Jul 05 11:17:26 2019 +0530 > @@ -1261,6 +1261,18 @@ > Enable motion estimation with source frame pixels, in this mode, > motion estimation can be computed independently. Default disabled. > > +.. option:: --hme, --no-hme > + > + Enable 3-level Hierarchical motion estimation at One-Sixteenth, > + Quarter and Full resolution. Default disabled. > + > +.. option:: --hme-search > <integer|string>,<integer|string>,<integer|string> > + > + Motion search method for HME Level 0, 1 and 2. Refer to > :option:`--me` for values. > + Specify search method for each level. Alternatively, specify a > single value > + which will apply to all levels. Default is hex,umh,umh for > + levels 0,1,2 respectively. > + > Spatial/intra options > ===================== > > diff -r 4f6dde51a5db -r 14a235657a20 source/CMakeLists.txt > --- a/source/CMakeLists.txt Fri Jul 05 10:47:15 2019 +0530 > +++ b/source/CMakeLists.txt Fri Jul 05 11:17:26 2019 +0530 > @@ -29,7 +29,7 @@ > option(STATIC_LINK_CRT "Statically link C runtime for release builds" OFF) > mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD) > # X265_BUILD must be incremented each time the public API is changed > -set(X265_BUILD 176) > +set(X265_BUILD 177) > configure_file("${PROJECT_SOURCE_DIR}/x265.def.in" > "${PROJECT_BINARY_DIR}/x265.def") > configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in" > diff -r 4f6dde51a5db -r 14a235657a20 source/common/lowres.cpp > --- a/source/common/lowres.cpp Fri Jul 05 10:47:15 2019 +0530 > +++ b/source/common/lowres.cpp Fri Jul 05 11:17:26 2019 +0530 > @@ -55,6 +55,7 @@ > heightFullRes = origPic->m_picHeight; > width = origPic->m_picWidth / 2; > lines = origPic->m_picHeight / 2; > + bEnableHME = param->bEnableHME ? 1 : 0; > lumaStride = width + 2 * origPic->m_lumaMarginX; > if (lumaStride & 31) > lumaStride += 32 - (lumaStride & 31); > @@ -137,6 +138,26 @@ > lowresPlane[2] = buffer[2] + padoffset; > lowresPlane[3] = buffer[3] + padoffset; > > + if (bEnableHME) > + { > + intptr_t lumaStrideHalf = lumaStride / 2; > + if (lumaStrideHalf & 31) > + lumaStrideHalf += 32 - (lumaStrideHalf & 31); > + size_t planesizeHalf = planesize / 2; > + size_t padoffsetHalf = padoffset / 2; > + /* allocate lower-res buffers */ > + CHECKED_MALLOC_ZERO(lowerResBuffer[0], pixel, 4 * planesizeHalf); > + > + lowerResBuffer[1] = lowerResBuffer[0] + planesizeHalf; > + lowerResBuffer[2] = lowerResBuffer[1] + planesizeHalf; > + lowerResBuffer[3] = lowerResBuffer[2] + planesizeHalf; > + > + lowerResPlane[0] = lowerResBuffer[0] + padoffsetHalf; > + lowerResPlane[1] = lowerResBuffer[1] + padoffsetHalf; > + lowerResPlane[2] = lowerResBuffer[2] + padoffsetHalf; > + lowerResPlane[3] = lowerResBuffer[3] + padoffsetHalf; > + } > + > CHECKED_MALLOC(intraCost, int32_t, cuCount); > CHECKED_MALLOC(intraMode, uint8_t, cuCount); > > @@ -166,6 +187,8 @@ > void Lowres::destroy() > { > X265_FREE(buffer[0]); > + if(bEnableHME) > + X265_FREE(lowerResBuffer[0]); > X265_FREE(intraCost); > X265_FREE(intraMode); > > @@ -253,5 +276,18 @@ > extendPicBorder(lowresPlane[1], lumaStride, width, lines, > origPic->m_lumaMarginX, origPic->m_lumaMarginY); > extendPicBorder(lowresPlane[2], lumaStride, width, lines, > origPic->m_lumaMarginX, origPic->m_lumaMarginY); > extendPicBorder(lowresPlane[3], lumaStride, width, lines, > origPic->m_lumaMarginX, origPic->m_lumaMarginY); > + > + if (origPic->m_param->bEnableHME) > + { > + primitives.frameInitLowerRes(lowresPlane[0], > + lowerResPlane[0], lowerResPlane[1], lowerResPlane[2], > lowerResPlane[3], > + lumaStride, lumaStride/2, (width / 2), (lines / 2)); > + extendPicBorder(lowerResPlane[0], lumaStride/2, width/2, lines/2, > origPic->m_lumaMarginX/2, origPic->m_lumaMarginY/2); > + extendPicBorder(lowerResPlane[1], lumaStride/2, width/2, lines/2, > origPic->m_lumaMarginX/2, origPic->m_lumaMarginY/2); > + extendPicBorder(lowerResPlane[2], lumaStride/2, width/2, lines/2, > origPic->m_lumaMarginX/2, origPic->m_lumaMarginY/2); > + extendPicBorder(lowerResPlane[3], lumaStride/2, width/2, lines/2, > origPic->m_lumaMarginX/2, origPic->m_lumaMarginY/2); > + fpelLowerResPlane[0] = lowerResPlane[0]; > + } > + > fpelPlane[0] = lowresPlane[0]; > } > diff -r 4f6dde51a5db -r 14a235657a20 source/common/lowres.h > --- a/source/common/lowres.h Fri Jul 05 10:47:15 2019 +0530 > +++ b/source/common/lowres.h Fri Jul 05 11:17:26 2019 +0530 > @@ -40,6 +40,10 @@ > pixel* lowresPlane[4]; > PicYuv* reconPic; > > + /* 1/16th resolution : Level-0 HME planes */ > + pixel* fpelLowerResPlane[3]; > + pixel* lowerResPlane[4]; > + > bool isWeighted; > bool isLowres; > > @@ -150,6 +154,7 @@ > struct Lowres : public ReferencePlanes > { > pixel *buffer[4]; > + pixel *lowerResBuffer[4]; // Level-0 buffer > > int frameNum; // Presentation frame number > int sliceType; // Slice type decided by lookahead > @@ -181,6 +186,9 @@ > uint32_t maxBlocksInRowFullRes; > uint32_t maxBlocksInColFullRes; > > + /* Hierarchical Motion Estimation */ > + bool bEnableHME; > + > /* used for vbvLookahead */ > int plannedType[X265_LOOKAHEAD_MAX + 1]; > int64_t plannedSatd[X265_LOOKAHEAD_MAX + 1]; > diff -r 4f6dde51a5db -r 14a235657a20 source/common/param.cpp > --- a/source/common/param.cpp Fri Jul 05 10:47:15 2019 +0530 > +++ b/source/common/param.cpp Fri Jul 05 11:17:26 2019 +0530 > @@ -201,6 +201,9 @@ > param->bEnableTSkipFast = 0; > param->maxNumReferences = 3; > param->bEnableTemporalMvp = 1; > + param->bEnableHME = 0; > + param->hmeSearchMethod[0] = X265_HEX_SEARCH; > + param->hmeSearchMethod[1] = param->hmeSearchMethod[2] = > X265_UMH_SEARCH; > param->bSourceReferenceEstimation = 0; > param->limitTU = 0; > param->dynamicRd = 0; > @@ -1282,6 +1285,27 @@ > OPT("fades") p->bEnableFades = atobool(value); > OPT("field") p->bField = atobool( value ); > OPT("cll") p->bEmitCLL = atobool(value); > + OPT("hme") p->bEnableHME = atobool(value); > + OPT("hme-search") > + { > + char search[3][5]; > + memset(search, '\0', 15 * sizeof(char)); > + if(3 == sscanf(value, "%d,%d,%d", &p->hmeSearchMethod[0], > &p->hmeSearchMethod[1], &p->hmeSearchMethod[2]) || > + 3 == sscanf(value, "%4[^,],%4[^,],%4[^,]", search[0], > search[1], search[2])) > + { > + if(search[0][0]) > + for(int level = 0; level < 3; level++) > + p->hmeSearchMethod[level] = > parseName(search[level], x265_motion_est_names, bError); > + } > + else if (sscanf(value, "%d", &p->hmeSearchMethod[0]) || > sscanf(value, "%s", search[0])) > + { > + if (search[0][0]) { > + p->hmeSearchMethod[0] = parseName(search[0], > x265_motion_est_names, bError); > + p->hmeSearchMethod[1] = p->hmeSearchMethod[2] = > p->hmeSearchMethod[0]; > + } > + } > + p->bEnableHME = true; > + } > else > return X265_PARAM_BAD_NAME; > } > @@ -1732,8 +1756,13 @@ > x265_log(param, X265_LOG_INFO, "Residual QT: max TU size, max depth : > %d / %d inter / %d intra\n", > param->maxTUSize, param->tuQTMaxInterDepth, > param->tuQTMaxIntraDepth); > > - x265_log(param, X265_LOG_INFO, "ME / range / subpel / merge : > %s / %d / %d / %d\n", > - x265_motion_est_names[param->searchMethod], > param->searchRange, param->subpelRefine, param->maxNumMergeCand); > + if (param->bEnableHME) > + x265_log(param, X265_LOG_INFO, "HME L0,1,2 / range / subpel / > merge : %s, %s, %s / %d / %d / %d\n", > + x265_motion_est_names[param->hmeSearchMethod[0]], > x265_motion_est_names[param->hmeSearchMethod[1]], > x265_motion_est_names[param->hmeSearchMethod[2]], param->searchRange, > param->subpelRefine, param->maxNumMergeCand); > + else > + x265_log(param, X265_LOG_INFO, "ME / range / subpel / merge > : %s / %d / %d / %d\n", > + x265_motion_est_names[param->searchMethod], > param->searchRange, param->subpelRefine, param->maxNumMergeCand); > + > if (param->keyframeMax != INT_MAX || param->scenecutThreshold) > x265_log(param, X265_LOG_INFO, "Keyframe min / max / scenecut / > bias: %d / %d / %d / %.2lf\n", param->keyframeMin, param->keyframeMax, > param->scenecutThreshold, param->scenecutBias * 100); > else > @@ -1928,6 +1957,9 @@ > s += sprintf(s, " subme=%d", p->subpelRefine); > s += sprintf(s, " merange=%d", p->searchRange); > BOOL(p->bEnableTemporalMvp, "temporal-mvp"); > + BOOL(p->bEnableHME, "hme"); > + if (p->bEnableHME) > + s += sprintf(s, " Level 0,1,2=%d,%d,%d", p->hmeSearchMethod[0], > p->hmeSearchMethod[1], p->hmeSearchMethod[2]); > BOOL(p->bEnableWeightedPred, "weightp"); > BOOL(p->bEnableWeightedBiPred, "weightb"); > BOOL(p->bSourceReferenceEstimation, "analyze-src-pics"); > @@ -2215,6 +2247,12 @@ > dst->subpelRefine = src->subpelRefine; > dst->searchRange = src->searchRange; > dst->bEnableTemporalMvp = src->bEnableTemporalMvp; > + dst->bEnableHME = src->bEnableHME; > + if (src->bEnableHME) > + { > + for (int level = 0; level < 3; level++) > + dst->hmeSearchMethod[level] = src->hmeSearchMethod[level]; > + } > dst->bEnableWeightedBiPred = src->bEnableWeightedBiPred; > dst->bEnableWeightedPred = src->bEnableWeightedPred; > dst->bSourceReferenceEstimation = src->bSourceReferenceEstimation; > diff -r 4f6dde51a5db -r 14a235657a20 source/common/pixel.cpp > --- a/source/common/pixel.cpp Fri Jul 05 10:47:15 2019 +0530 > +++ b/source/common/pixel.cpp Fri Jul 05 11:17:26 2019 +0530 > @@ -1309,6 +1309,7 @@ > p.scale1D_128to64[NONALIGNED] = p.scale1D_128to64[ALIGNED] = > scale1D_128to64; > p.scale2D_64to32 = scale2D_64to32; > p.frameInitLowres = frame_init_lowres_core; > + p.frameInitLowerRes = frame_init_lowres_core; > p.ssim_4x4x2_core = ssim_4x4x2_core; > p.ssim_end_4 = ssim_end_4; > > diff -r 4f6dde51a5db -r 14a235657a20 source/common/primitives.h > --- a/source/common/primitives.h Fri Jul 05 10:47:15 2019 +0530 > +++ b/source/common/primitives.h Fri Jul 05 11:17:26 2019 +0530 > @@ -349,6 +349,7 @@ > saoCuStatsE3_t saoCuStatsE3; > > downscale_t frameInitLowres; > + downscale_t frameInitLowerRes; > cutree_propagate_cost propagateCost; > cutree_fix8_unpack fix8Unpack; > cutree_fix8_pack fix8Pack; > diff -r 4f6dde51a5db -r 14a235657a20 source/common/x86/asm-primitives.cpp > --- a/source/common/x86/asm-primitives.cpp Fri Jul 05 10:47:15 2019 > +0530 > +++ b/source/common/x86/asm-primitives.cpp Fri Jul 05 11:17:26 2019 > +0530 > @@ -1090,6 +1090,7 @@ > LUMA_VSS_FILTERS(sse2); > > p.frameInitLowres = PFX(frame_init_lowres_core_sse2); > + p.frameInitLowerRes = PFX(frame_init_lowres_core_sse2); > // TODO: the planecopy_sp is really planecopy_SC now, must be fix > it > //p.planecopy_sp = PFX(downShift_16_sse2); > p.planecopy_sp_shl = PFX(upShift_16_sse2); > @@ -1132,6 +1133,7 @@ > p.cu[BLOCK_8x8].idct = PFX(idct8_ssse3); > > p.frameInitLowres = PFX(frame_init_lowres_core_ssse3); > + p.frameInitLowerRes = PFX(frame_init_lowres_core_ssse3); > > ALL_LUMA_PU(convert_p2s[ALIGNED], filterPixelToShort, ssse3); > ALL_LUMA_PU(convert_p2s[NONALIGNED], filterPixelToShort, ssse3); > @@ -1453,6 +1455,7 @@ > p.cu[BLOCK_64x64].copy_sp = > (copy_sp_t)PFX(blockcopy_ss_64x64_avx); > > p.frameInitLowres = PFX(frame_init_lowres_core_avx); > + p.frameInitLowerRes = PFX(frame_init_lowres_core_avx); > > p.pu[LUMA_64x16].copy_pp = (copy_pp_t)PFX(blockcopy_ss_64x16_avx); > p.pu[LUMA_64x32].copy_pp = (copy_pp_t)PFX(blockcopy_ss_64x32_avx); > @@ -1469,6 +1472,7 @@ > #endif > LUMA_VAR(xop); > p.frameInitLowres = PFX(frame_init_lowres_core_xop); > + p.frameInitLowerRes = PFX(frame_init_lowres_core_xop); > } > if (cpuMask & X265_CPU_AVX2) > { > @@ -2296,6 +2300,7 @@ > p.chroma[X265_CSP_I444].pu[LUMA_64x64].filter_vsp = > PFX(interp_4tap_vert_sp_64x64_avx2); > > p.frameInitLowres = PFX(frame_init_lowres_core_avx2); > + p.frameInitLowerRes = PFX(frame_init_lowres_core_avx2); > p.propagateCost = PFX(mbtree_propagate_cost_avx2); > p.fix8Unpack = PFX(cutree_fix8_unpack_avx2); > p.fix8Pack = PFX(cutree_fix8_pack_avx2); > @@ -3294,6 +3299,7 @@ > > //p.frameInitLowres = PFX(frame_init_lowres_core_mmx2); > p.frameInitLowres = PFX(frame_init_lowres_core_sse2); > + p.frameInitLowerRes = PFX(frame_init_lowres_core_sse2); > > ALL_LUMA_TU(blockfill_s[NONALIGNED], blockfill_s, sse2); > ALL_LUMA_TU(blockfill_s[ALIGNED], blockfill_s, sse2); > @@ -3414,6 +3420,7 @@ > p.pu[LUMA_8x8].luma_hvpp = PFX(interp_8tap_hv_pp_8x8_ssse3); > > p.frameInitLowres = PFX(frame_init_lowres_core_ssse3); > + p.frameInitLowerRes = PFX(frame_init_lowres_core_ssse3); > ASSIGN2(p.scale1D_128to64, scale1D_128to64_ssse3); > p.scale2D_64to32 = PFX(scale2D_64to32_ssse3); > > @@ -3682,6 +3689,7 @@ > p.pu[LUMA_48x64].copy_pp = PFX(blockcopy_pp_48x64_avx); > > p.frameInitLowres = PFX(frame_init_lowres_core_avx); > + p.frameInitLowerRes = PFX(frame_init_lowres_core_avx); > p.propagateCost = PFX(mbtree_propagate_cost_avx); > } > if (cpuMask & X265_CPU_XOP) > @@ -3693,6 +3701,8 @@ > p.cu[BLOCK_8x8].sse_pp = PFX(pixel_ssd_8x8_xop); > p.cu[BLOCK_16x16].sse_pp = PFX(pixel_ssd_16x16_xop); > p.frameInitLowres = PFX(frame_init_lowres_core_xop); > + p.frameInitLowerRes = PFX(frame_init_lowres_core_xop); > + > } > #if X86_64 > if (cpuMask & X265_CPU_AVX2) > @@ -4667,6 +4677,7 @@ > p.chroma[X265_CSP_I444].pu[LUMA_64x16].filter_vpp = > PFX(interp_4tap_vert_pp_64x16_avx2); > > p.frameInitLowres = PFX(frame_init_lowres_core_avx2); > + p.frameInitLowerRes = PFX(frame_init_lowres_core_avx2); > p.propagateCost = PFX(mbtree_propagate_cost_avx2); > p.saoCuStatsE0 = PFX(saoCuStatsE0_avx2); > p.saoCuStatsE1 = PFX(saoCuStatsE1_avx2); > diff -r 4f6dde51a5db -r 14a235657a20 source/encoder/encoder.cpp > --- a/source/encoder/encoder.cpp Fri Jul 05 10:47:15 2019 +0530 > +++ b/source/encoder/encoder.cpp Fri Jul 05 11:17:26 2019 +0530 > @@ -3379,6 +3379,15 @@ > p->bRepeatHeaders = 1; > x265_log(p, X265_LOG_WARNING, "Turning on repeat - headers for > zone encoding\n"); > } > + > + if (m_param->bEnableHME) > + { > + if (m_param->sourceHeight < 540) > + { > + x265_log(p, X265_LOG_WARNING, "Source height < 540p is too > low for HME. Disabling HME.\n"); > + p->bEnableHME = 0; > + } > + } > } > > void Encoder::readAnalysisFile(x265_analysis_data* analysis, int curPoc, > const x265_picture* picIn, int paramBytes) > diff -r 4f6dde51a5db -r 14a235657a20 source/test/regression-tests.txt > --- a/source/test/regression-tests.txt Fri Jul 05 10:47:15 2019 +0530 > +++ b/source/test/regression-tests.txt Fri Jul 05 11:17:26 2019 +0530 > @@ -153,6 +153,7 @@ > big_buck_bunny_360p24.y4m, --keyint 60 --min-keyint 40 --gop-lookahead 14 > BasketballDrive_1920x1080_50.y4m, --preset medium --no-open-gop --keyint > 50 --min-keyint 50 --radl 2 --vbv-maxrate 5000 --vbv-bufsize 5000 > big_buck_bunny_360p24.y4m, --bitrate 500 --fades > +720p50_parkrun_ter.y4m,--preset medium --bitrate 400 --hme > > # Main12 intraCost overflow bug test > 720p50_parkrun_ter.y4m,--preset medium > diff -r 4f6dde51a5db -r 14a235657a20 source/x265.h > --- a/source/x265.h Fri Jul 05 10:47:15 2019 +0530 > +++ b/source/x265.h Fri Jul 05 11:17:26 2019 +0530 > @@ -1172,6 +1172,14 @@ > /* Enable availability of temporal motion vector for AMVP, default is > enabled */ > int bEnableTemporalMvp; > > + /* Enable 3-level Hierarchical motion estimation at One-Sixteenth, > Quarter and Full resolution. > + * Default is disabled */ > + int bEnableHME; > + > + /* Enable HME search method (DIA, HEX, UMH, STAR, SEA, FULL) for > level 0, 1 and 2. > + * Default is hex, umh, umh for L0, L1 and L2 respectively. */ > + int hmeSearchMethod[3]; > + > /* Enable weighted prediction in P slices. This enables weighting > analysis > * in the lookahead, which influences slice decisions, and enables > weighting > * analysis in the main encoder which allows P reference samples to > have a > diff -r 4f6dde51a5db -r 14a235657a20 source/x265cli.h > --- a/source/x265cli.h Fri Jul 05 10:47:15 2019 +0530 > +++ b/source/x265cli.h Fri Jul 05 11:17:26 2019 +0530 > @@ -95,6 +95,9 @@ > { "max-merge", required_argument, NULL, 0 }, > { "no-temporal-mvp", no_argument, NULL, 0 }, > { "temporal-mvp", no_argument, NULL, 0 }, > + { "hme", no_argument, NULL, 0 }, > + { "no-hme", no_argument, NULL, 0 }, > + { "hme-search", required_argument, NULL, 0 }, > { "rdpenalty", required_argument, NULL, 0 }, > { "no-rect", no_argument, NULL, 0 }, > { "rect", no_argument, NULL, 0 }, > @@ -464,6 +467,8 @@ > H0(" --[no-]amp Enable asymmetric motion > partitions, requires --rect. Default %s\n", OPT(param->bEnableAMP)); > H0(" --[no-]limit-modes Limit rectangular and asymmetric > motion predictions. Default %d\n", param->limitModes); > H1(" --[no-]temporal-mvp Enable temporal MV predictors. > Default %s\n", OPT(param->bEnableTemporalMvp)); > + H1(" --[no-]hme Enable Hierarchical Motion > Estimation. Default %s\n", OPT(param->bEnableHME)); > + H1(" --hme-search <string> Motion search-method for HME > L0,L1 and L2. Default(L0,L1,L2) is %d,%d,%d\n", param->hmeSearchMethod[0], > param->hmeSearchMethod[1], param->hmeSearchMethod[2]); > H0("\nSpatial / intra options:\n"); > H0(" --[no-]strong-intra-smoothing Enable strong intra smoothing > for 32x32 blocks. Default %s\n", OPT(param->bEnableStrongIntraSmoothing)); > H0(" --[no-]constrained-intra Constrained intra prediction > (use only intra coded reference pixels) Default %s\n", > OPT(param->bEnableConstrainedIntra)); > _______________________________________________ > x265-devel mailing list > [email protected] > https://mailman.videolan.org/listinfo/x265-devel > -- Regards, Aruna
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
