>From 0d1748cbf58e83b6357e4ff3c9696687e3c30ddd Mon Sep 17 00:00:00 2001 From: ashok2022 <[email protected]> Date: Thu, 13 Oct 2022 20:22:07 +0530 Subject: [PATCH] Implement ASM for SSD used for motion estimation
--- source/common/temporalfilter.cpp | 47 +++++++++++++++++++++++++------- source/common/temporalfilter.h | 31 ++++++++++++++++----- source/encoder/frameencoder.cpp | 2 ++ source/encoder/motion.cpp | 25 +++++++++++++++++ source/encoder/motion.h | 2 +- 5 files changed, 89 insertions(+), 18 deletions(-) diff --git a/source/common/temporalfilter.cpp b/source/common/temporalfilter.cpp index 1d5a7d076..a937e2a67 100644 --- a/source/common/temporalfilter.cpp +++ b/source/common/temporalfilter.cpp @@ -1,6 +1,8 @@ /***************************************************************************** * Copyright (C) 2013-2021 MulticoreWare, Inc * + * Authors: Ashok Kumar Mishra <[email protected]> + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -18,8 +20,9 @@ * This program is also available under a commercial proprietary license. * For more information, contact us at license @ x265.com. *****************************************************************************/ - +#include "common.h" #include "temporalfilter.h" +#include "primitives.h" #include "frame.h" #include "slice.h" @@ -160,6 +163,10 @@ void TemporalFilter::init(const x265_param* param) m_sourceHeight = param->sourceHeight; m_internalCsp = param->internalCsp; m_numComponents = (m_internalCsp != X265_CSP_I400) ? MAX_NUM_COMPONENT : 1; + + m_metld = new MotionEstimatorTLD; + + predPUYuv.create(FENC_STRIDE, X265_CSP_I400); } int TemporalFilter::createRefPicInfo(MCTFReferencePicInfo* refFrame, x265_param* param) @@ -206,21 +213,33 @@ int TemporalFilter::motionErrorLuma( { dx /= s_motionVectorFactor; dy /= s_motionVectorFactor; + + const pixel* bufferRowStart = buffOrigin + (y + dy) * buffStride + (x + dx); +#if 0 + const pixel* origRowStart = origOrigin + y *origStride + x; + for (int y1 = 0; y1 < bs; y1++) { - const pixel* origRowStart = origOrigin + (y + y1)*origStride + x; - const pixel* bufferRowStart = buffOrigin + (y + y1 + dy)*buffStride + (x + dx); - for (int x1 = 0; x1 < bs; x1 += 2) + for (int x1 = 0; x1 < bs; x1++) { int diff = origRowStart[x1] - bufferRowStart[x1]; error += diff * diff; - diff = origRowStart[x1 + 1] - bufferRowStart[x1 + 1]; - error += diff * diff; - } - if (error > besterror) - { - return error; } + + origRowStart += origStride; + bufferRowStart += buffStride; + } +#else + int partEnum = partitionFromSizes(bs, bs); + /* copy PU block into cache */ + primitives.pu[partEnum].copy_pp(predPUYuv.m_buf[0], FENC_STRIDE, bufferRowStart, buffStride); + + error = primitives.cu[partEnum].sse_pp(m_metld->me.fencPUYuv.m_buf[0], FENC_STRIDE, predPUYuv.m_buf[0], FENC_STRIDE); + +#endif + if (error > besterror) + { + return error; } } else @@ -761,6 +780,10 @@ void TemporalFilter::motionEstimationLuma(MV *mvs, uint32_t mvStride, PicYuv *or { for (int blockX = 0; blockX + blockSize <= origWidth; blockX += stepSize) { + const intptr_t pelOffset = blockY * orig->m_stride + blockX; + m_metld->me.setSourcePU(orig->m_picOrg[0], orig->m_stride, pelOffset, blockSize, blockSize, X265_HEX_SEARCH, 1); + + MV best(0, 0); int leastError = INT_MAX; @@ -889,6 +912,10 @@ void TemporalFilter::motionEstimationLumaDoubleRes(MV *mvs, uint32_t mvStride, P { for (int blockX = 0; blockX + blockSize <= origWidth; blockX += stepSize) { + + const intptr_t pelOffset = blockY * orig->m_stride + blockX; + m_metld->me.setSourcePU(orig->m_picOrg[0], orig->m_stride, pelOffset, blockSize, blockSize, X265_HEX_SEARCH, 1); + MV best(0, 0); int leastError = INT_MAX; diff --git a/source/common/temporalfilter.h b/source/common/temporalfilter.h index 003630994..801359914 100644 --- a/source/common/temporalfilter.h +++ b/source/common/temporalfilter.h @@ -29,6 +29,7 @@ #include <deque> #include "piclist.h" #include "yuv.h" +#include "motion.h" using namespace X265_NS; @@ -94,6 +95,19 @@ struct TemporalFilterRefPicInfo int origOffset; }; +struct MotionEstimatorTLD +{ + MotionEstimate me; + + MotionEstimatorTLD() + { + me.init(X265_CSP_I400); + me.setQP(X265_LOOKAHEAD_QP); + } + + ~MotionEstimatorTLD() {} +}; + struct MCTFReferencePicInfo { PicYuv* picBuffer; @@ -103,16 +117,16 @@ struct MCTFReferencePicInfo MV* mvs0; MV* mvs1; MV* mvs2; - uint32_t mvsStride; - uint32_t mvsStride0; - uint32_t mvsStride1; - uint32_t mvsStride2; - int* error; - int* noise; + uint32_t mvsStride; + uint32_t mvsStride0; + uint32_t mvsStride1; + uint32_t mvsStride2; + int* error; + int* noise; int16_t origOffset; bool isFilteredFrame; - PicYuv* compensatedPic; + PicYuv* compensatedPic; int* isSubsampled; @@ -154,6 +168,9 @@ public: int m_numComponents; uint8_t m_sliceTypeConfig; + MotionEstimatorTLD* m_metld; + Yuv predPUYuv; + void subsampleLuma(PicYuv *input, PicYuv *output, int factor = 2); int createRefPicInfo(MCTFReferencePicInfo* refFrame, x265_param* param); diff --git a/source/encoder/frameencoder.cpp b/source/encoder/frameencoder.cpp index 0a44eb22f..ec78fc9f2 100644 --- a/source/encoder/frameencoder.cpp +++ b/source/encoder/frameencoder.cpp @@ -105,6 +105,8 @@ void FrameEncoder::destroy() if (m_param->bEnableGopBasedTemporalFilter) { + delete m_frameEncTF->m_metld; + for (int i = 0; i < (m_frameEncTF->s_range << 1); i++) m_frameEncTF->destroyRefPicInfo(&m_mcstfRefList[i]); diff --git a/source/encoder/motion.cpp b/source/encoder/motion.cpp index f10db884e..2bb613ec0 100644 --- a/source/encoder/motion.cpp +++ b/source/encoder/motion.cpp @@ -190,6 +190,31 @@ void MotionEstimate::setSourcePU(pixel *fencY, intptr_t stride, intptr_t offset, X265_CHECK(!bChromaSATD, "chroma distortion measurements impossible in this code path\n"); } +/* Called by lookahead, luma only, no use of PicYuv */ +void MotionEstimate::setSourcePU(pixel *fencY, intptr_t stride, intptr_t offset, int pwidth, int pheight, const int method, const int refine) +{ + partEnum = partitionFromSizes(pwidth, pheight); + X265_CHECK(LUMA_4x4 != partEnum, "4x4 inter partition detected!\n"); + sad = primitives.pu[partEnum].sad; + ads = primitives.pu[partEnum].ads; + satd = primitives.pu[partEnum].satd; + sad_x3 = primitives.pu[partEnum].sad_x3; + sad_x4 = primitives.pu[partEnum].sad_x4; + + + blockwidth = pwidth; + blockOffset = offset; + absPartIdx = ctuAddr = -1; + + /* Search params */ + searchMethod = method; + subpelRefine = refine; + + /* copy PU block into cache */ + primitives.pu[partEnum].copy_pp(fencPUYuv.m_buf[0], FENC_STRIDE, fencY + offset, stride); + X265_CHECK(!bChromaSATD, "chroma distortion measurements impossible in this code path\n"); +} + /* Called by Search::predInterSearch() or --pme equivalent, chroma residual might be considered */ void MotionEstimate::setSourcePU(const Yuv& srcFencYuv, int _ctuAddr, int cuPartIdx, int puPartIdx, int pwidth, int pheight, const int method, const int refine, bool bChroma) { diff --git a/source/encoder/motion.h b/source/encoder/motion.h index d306230b4..790bc5fb4 100644 --- a/source/encoder/motion.h +++ b/source/encoder/motion.h @@ -77,7 +77,7 @@ public: void init(int csp); /* Methods called at slice setup */ - + void setSourcePU(pixel *fencY, intptr_t stride, intptr_t offset, int pwidth, int pheight, const int searchMethod, const int subpelRefine); void setSourcePU(pixel *fencY, intptr_t stride, intptr_t offset, int pwidth, int pheight, const int searchMethod, const int searchL0, const int searchL1, const int subpelRefine); void setSourcePU(const Yuv& srcFencYuv, int ctuAddr, int cuPartIdx, int puPartIdx, int pwidth, int pheight, const int searchMethod, const int subpelRefine, bool bChroma); -- 2.34.1.windows.1 *Thanks and Regards,* *Snehaa.GVideo Codec Engineer,Media & AI analytics <https://multicorewareinc.com/>*
mcstf_patch_10.diff
Description: Binary data
_______________________________________________ x265-devel mailing list [email protected] https://mailman.videolan.org/listinfo/x265-devel
