# HG changeset patch # User Santhoshini Sekar <santhosh...@multicorewareinc.com> # Date 1482321849 -19800 # Wed Dec 21 17:34:09 2016 +0530 # Node ID 9216f2375f1b26d96b6023f734be5f6bb8e888a0 # Parent 82e4e3b0bb460c0fe140953342e12a0a1b3da004 reuse analysis information from pass 1 to effectively reduce computation in pass 2
diff --git a/source/encoder/analysis.cpp b/source/encoder/analysis.cpp --- a/source/encoder/analysis.cpp +++ b/source/encoder/analysis.cpp @@ -146,6 +146,23 @@ m_modeDepth[0].fencYuv.copyFromPicYuv(*m_frame->m_fencPic, ctu.m_cuAddr, 0); uint32_t numPartition = ctu.m_numPartitions; + if (m_param->analysisMultiPassRefine && m_param->rc.bStatRead) + { + m_multipassAnalysis = (analysis2PassFrameData*)m_frame->m_analysis2Pass.analysisFramedata; + m_multipassDepth = &m_multipassAnalysis->depth[ctu.m_cuAddr * ctu.m_numPartitions]; + if (m_slice->m_sliceType != I_SLICE) + { + int numPredDir = m_slice->isInterP() ? 1 : 2; + for (int dir = 0; dir < numPredDir; dir++) + { + m_multipassMv[dir] = &m_multipassAnalysis->m_mv[dir][ctu.m_cuAddr * ctu.m_numPartitions]; + m_multipassMvpIdx[dir] = &m_multipassAnalysis->mvpIdx[dir][ctu.m_cuAddr * ctu.m_numPartitions]; + m_multipassRef[dir] = &m_multipassAnalysis->ref[dir][ctu.m_cuAddr * ctu.m_numPartitions]; + } + m_multipassModes = &m_multipassAnalysis->modes[ctu.m_cuAddr * ctu.m_numPartitions]; + } + } + if (m_param->analysisMode && m_slice->m_sliceType != I_SLICE) { int numPredDir = m_slice->isInterP() ? 1 : 2; @@ -1015,6 +1032,22 @@ } } } + if (m_param->analysisMultiPassRefine && m_param->rc.bStatRead && m_multipassAnalysis) + { + if (mightNotSplit && depth == m_multipassDepth[cuGeom.absPartIdx]) + { + if (m_multipassModes[cuGeom.absPartIdx] == MODE_SKIP) + { + md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp); + md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp); + checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom); + + skipRecursion = !!m_param->bEnableRecursionSkip && md.bestMode; + if (m_param->rdLevel) + skipModes = m_param->bEnableEarlySkip && md.bestMode; + } + } + } /* Step 1. Evaluate Merge/Skip candidates for likely early-outs, if skip mode was not set above */ if (mightNotSplit && depth >= minDepth && !md.bestMode) /* TODO: Re-evaluate if analysis load/save still works */ @@ -1562,6 +1595,28 @@ } } + if (m_param->analysisMultiPassRefine && m_param->rc.bStatRead && m_multipassAnalysis) + { + if (mightNotSplit && depth == m_multipassDepth[cuGeom.absPartIdx]) + { + if (m_multipassModes[cuGeom.absPartIdx] == MODE_SKIP) + { + md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp); + md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp); + checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom); + + skipModes = !!m_param->bEnableEarlySkip && md.bestMode; + refMasks[0] = allSplitRefs; + md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom, qp); + checkInter_rd5_6(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, refMasks); + checkBestMode(md.pred[PRED_2Nx2N], cuGeom.depth); + + if (m_param->bEnableRecursionSkip && depth && m_modeDepth[depth - 1].bestMode) + skipRecursion = md.bestMode && !md.bestMode->cu.getQtRootCbf(0); + } + } + } + /* Step 1. Evaluate Merge/Skip candidates for likely early-outs */ if (mightNotSplit && !md.bestMode) { @@ -2310,6 +2365,21 @@ bestME[i].ref = m_reuseRef[refOffset + index++]; } } + + if (m_param->analysisMultiPassRefine && m_param->rc.bStatRead && m_multipassAnalysis) + { + uint32_t numPU = interMode.cu.getNumPartInter(0); + for (uint32_t part = 0; part < numPU; part++) + { + MotionData* bestME = interMode.bestME[part]; + for (int32_t i = 0; i < numPredDir; i++) + { + bestME[i].ref = m_multipassRef[i][cuGeom.absPartIdx]; + bestME[i].mv = m_multipassMv[i][cuGeom.absPartIdx]; + bestME[i].mvpIdx = m_multipassMvpIdx[i][cuGeom.absPartIdx]; + } + } + } predInterSearch(interMode, cuGeom, m_bChromaSa8d && (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400), refMask); /* predInterSearch sets interMode.sa8dBits */ @@ -2359,6 +2429,22 @@ bestME[i].ref = m_reuseRef[refOffset + index++]; } } + + if (m_param->analysisMultiPassRefine && m_param->rc.bStatRead && m_multipassAnalysis) + { + uint32_t numPU = interMode.cu.getNumPartInter(0); + for (uint32_t part = 0; part < numPU; part++) + { + MotionData* bestME = interMode.bestME[part]; + for (int32_t i = 0; i < numPredDir; i++) + { + bestME[i].ref = m_multipassRef[i][cuGeom.absPartIdx]; + bestME[i].mv = m_multipassMv[i][cuGeom.absPartIdx]; + bestME[i].mvpIdx = m_multipassMvpIdx[i][cuGeom.absPartIdx]; + } + } + } + predInterSearch(interMode, cuGeom, m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400, refMask); /* predInterSearch sets interMode.sa8dBits, but this is ignored */ diff --git a/source/encoder/analysis.h b/source/encoder/analysis.h --- a/source/encoder/analysis.h +++ b/source/encoder/analysis.h @@ -130,6 +130,13 @@ uint32_t m_splitRefIdx[4]; uint64_t* cacheCost; + + analysis2PassFrameData* m_multipassAnalysis; + uint8_t* m_multipassDepth; + MV* m_multipassMv[2]; + int* m_multipassMvpIdx[2]; + int32_t* m_multipassRef[2]; + uint8_t* m_multipassModes; /* refine RD based on QP for rd-levels 5 and 6 */ void qprdRefine(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qp, int32_t lqp); diff --git a/source/encoder/search.cpp b/source/encoder/search.cpp --- a/source/encoder/search.cpp +++ b/source/encoder/search.cpp @@ -2128,7 +2128,7 @@ cu.getNeighbourMV(puIdx, pu.puAbsPartIdx, interMode.interNeighbours); /* Uni-directional prediction */ - if (m_param->analysisMode == X265_ANALYSIS_LOAD) + if (m_param->analysisMode == X265_ANALYSIS_LOAD || (m_param->analysisMultiPassRefine && m_param->rc.bStatRead)) { for (int list = 0; list < numPredDir; list++) { @@ -2153,7 +2153,11 @@ m_me.integral[planes] = interMode.fencYuv->m_integral[list][ref][planes] + puX * pu.width + puY * pu.height * m_slice->m_refFrameList[list][ref]->m_reconPic->m_stride; } setSearchRange(cu, mvp, m_param->searchRange, mvmin, mvmax); - int satdCost = m_me.motionEstimate(&slice->m_mref[list][ref], mvmin, mvmax, mvp, numMvc, mvc, m_param->searchRange, outmv, + MV mvpIn = mvp; + if (m_param->analysisMultiPassRefine && m_param->rc.bStatRead && mvpIdx == bestME[list].mvpIdx) + mvpIn = bestME[list].mv; + + int satdCost = m_me.motionEstimate(&slice->m_mref[list][ref], mvmin, mvmax, mvpIn, numMvc, mvc, m_param->searchRange, outmv, m_param->bSourceReferenceEstimation ? m_slice->m_refFrameList[list][ref]->m_fencPic->getLumaAddr(0) : 0); /* Get total cost of partition, but only include MV bit cost once */ @@ -2162,7 +2166,22 @@ uint32_t cost = (satdCost - mvCost) + m_rdCost.getCost(bits); /* Refine MVP selection, updates: mvpIdx, bits, cost */ - mvp = checkBestMVP(amvp, outmv, mvpIdx, bits, cost); + if (!m_param->analysisMultiPassRefine) + mvp = checkBestMVP(amvp, outmv, mvpIdx, bits, cost); + else + { + /* It is more accurate to compare with actual mvp that was used in motionestimate than amvp[mvpIdx]. Here + the actual mvp is bestME from pass 1 for that mvpIdx */ + int diffBits = m_me.bitcost(outmv, amvp[!mvpIdx]) - m_me.bitcost(outmv, mvpIn); + if (diffBits < 0) + { + mvpIdx = !mvpIdx; + uint32_t origOutBits = bits; + bits = origOutBits + diffBits; + cost = (cost - m_rdCost.getCost(origOutBits)) + m_rdCost.getCost(bits); + } + mvp = amvp[mvpIdx]; + } if (cost < bestME[list].cost) { diff --git a/source/test/rate-control-tests.txt b/source/test/rate-control-tests.txt --- a/source/test/rate-control-tests.txt +++ b/source/test/rate-control-tests.txt @@ -43,3 +43,9 @@ RaceHorses_416x240_30_10bit.yuv,--preset medium --crf 26 --vbv-maxrate 1000 --vbv-bufsize 1000 --pass 1,--preset fast --bitrate 1000 --vbv-maxrate 1000 --vbv-bufsize 700 --pass 3 -F4,--preset slow --bitrate 500 --vbv-maxrate 500 --vbv-bufsize 700 --pass 2 -F4 sita_1920x1080_30.yuv, --preset ultrafast --crf 20 --no-cutree --keyint 50 --min-keyint 50 --no-open-gop --pass 1 --vbv-bufsize 7000 --vbv-maxrate 5000, --preset ultrafast --crf 20 --no-cutree --keyint 50 --min-keyint 50 --no-open-gop --pass 2 --vbv-bufsize 7000 --vbv-maxrate 5000 --repeat-headers sita_1920x1080_30.yuv, --preset medium --crf 20 --no-cutree --keyint 50 --min-keyint 50 --no-open-gop --pass 1 --vbv-bufsize 7000 --vbv-maxrate 5000 --repeat-headers --multi-pass-opt-rps, --preset medium --crf 20 --no-cutree --keyint 50 --min-keyint 50 --no-open-gop --pass 2 --vbv-bufsize 7000 --vbv-maxrate 5000 --repeat-headers --multi-pass-opt-rps + +# multi-pass rate control and analysis +ducks_take_off_1080p50.y4m,--bitrate 6000 --pass 1 --multi-pass-opt-analysis --hash 1 --ssim --psnr +ducks_take_off_1080p50.y4m,--bitrate 6000 --pass 2 --multi-pass-opt-analysis --hash 1 --ssim --psnr +big_buck_bunny_360p24.y4m,--preset veryslow --bitrate 600 --pass 1 --multi-pass-opt-analysis --hash 1 --ssim --psnr +big_buck_bunny_360p24.y4m,--preset veryslow --bitrate 600 --pass 2 --multi-pass-opt-analysis --hash 1 --ssim --psnr
# HG changeset patch # User Santhoshini Sekar <santhosh...@multicorewareinc.com> # Date 1482321849 -19800 # Wed Dec 21 17:34:09 2016 +0530 # Node ID 9216f2375f1b26d96b6023f734be5f6bb8e888a0 # Parent 82e4e3b0bb460c0fe140953342e12a0a1b3da004 reuse analysis information from pass 1 to effectively reduce computation in pass 2 diff --git a/source/encoder/analysis.cpp b/source/encoder/analysis.cpp --- a/source/encoder/analysis.cpp +++ b/source/encoder/analysis.cpp @@ -146,6 +146,23 @@ m_modeDepth[0].fencYuv.copyFromPicYuv(*m_frame->m_fencPic, ctu.m_cuAddr, 0); uint32_t numPartition = ctu.m_numPartitions; + if (m_param->analysisMultiPassRefine && m_param->rc.bStatRead) + { + m_multipassAnalysis = (analysis2PassFrameData*)m_frame->m_analysis2Pass.analysisFramedata; + m_multipassDepth = &m_multipassAnalysis->depth[ctu.m_cuAddr * ctu.m_numPartitions]; + if (m_slice->m_sliceType != I_SLICE) + { + int numPredDir = m_slice->isInterP() ? 1 : 2; + for (int dir = 0; dir < numPredDir; dir++) + { + m_multipassMv[dir] = &m_multipassAnalysis->m_mv[dir][ctu.m_cuAddr * ctu.m_numPartitions]; + m_multipassMvpIdx[dir] = &m_multipassAnalysis->mvpIdx[dir][ctu.m_cuAddr * ctu.m_numPartitions]; + m_multipassRef[dir] = &m_multipassAnalysis->ref[dir][ctu.m_cuAddr * ctu.m_numPartitions]; + } + m_multipassModes = &m_multipassAnalysis->modes[ctu.m_cuAddr * ctu.m_numPartitions]; + } + } + if (m_param->analysisMode && m_slice->m_sliceType != I_SLICE) { int numPredDir = m_slice->isInterP() ? 1 : 2; @@ -1015,6 +1032,22 @@ } } } + if (m_param->analysisMultiPassRefine && m_param->rc.bStatRead && m_multipassAnalysis) + { + if (mightNotSplit && depth == m_multipassDepth[cuGeom.absPartIdx]) + { + if (m_multipassModes[cuGeom.absPartIdx] == MODE_SKIP) + { + md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp); + md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp); + checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom); + + skipRecursion = !!m_param->bEnableRecursionSkip && md.bestMode; + if (m_param->rdLevel) + skipModes = m_param->bEnableEarlySkip && md.bestMode; + } + } + } /* Step 1. Evaluate Merge/Skip candidates for likely early-outs, if skip mode was not set above */ if (mightNotSplit && depth >= minDepth && !md.bestMode) /* TODO: Re-evaluate if analysis load/save still works */ @@ -1562,6 +1595,28 @@ } } + if (m_param->analysisMultiPassRefine && m_param->rc.bStatRead && m_multipassAnalysis) + { + if (mightNotSplit && depth == m_multipassDepth[cuGeom.absPartIdx]) + { + if (m_multipassModes[cuGeom.absPartIdx] == MODE_SKIP) + { + md.pred[PRED_MERGE].cu.initSubCU(parentCTU, cuGeom, qp); + md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp); + checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom); + + skipModes = !!m_param->bEnableEarlySkip && md.bestMode; + refMasks[0] = allSplitRefs; + md.pred[PRED_2Nx2N].cu.initSubCU(parentCTU, cuGeom, qp); + checkInter_rd5_6(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, refMasks); + checkBestMode(md.pred[PRED_2Nx2N], cuGeom.depth); + + if (m_param->bEnableRecursionSkip && depth && m_modeDepth[depth - 1].bestMode) + skipRecursion = md.bestMode && !md.bestMode->cu.getQtRootCbf(0); + } + } + } + /* Step 1. Evaluate Merge/Skip candidates for likely early-outs */ if (mightNotSplit && !md.bestMode) { @@ -2310,6 +2365,21 @@ bestME[i].ref = m_reuseRef[refOffset + index++]; } } + + if (m_param->analysisMultiPassRefine && m_param->rc.bStatRead && m_multipassAnalysis) + { + uint32_t numPU = interMode.cu.getNumPartInter(0); + for (uint32_t part = 0; part < numPU; part++) + { + MotionData* bestME = interMode.bestME[part]; + for (int32_t i = 0; i < numPredDir; i++) + { + bestME[i].ref = m_multipassRef[i][cuGeom.absPartIdx]; + bestME[i].mv = m_multipassMv[i][cuGeom.absPartIdx]; + bestME[i].mvpIdx = m_multipassMvpIdx[i][cuGeom.absPartIdx]; + } + } + } predInterSearch(interMode, cuGeom, m_bChromaSa8d && (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400), refMask); /* predInterSearch sets interMode.sa8dBits */ @@ -2359,6 +2429,22 @@ bestME[i].ref = m_reuseRef[refOffset + index++]; } } + + if (m_param->analysisMultiPassRefine && m_param->rc.bStatRead && m_multipassAnalysis) + { + uint32_t numPU = interMode.cu.getNumPartInter(0); + for (uint32_t part = 0; part < numPU; part++) + { + MotionData* bestME = interMode.bestME[part]; + for (int32_t i = 0; i < numPredDir; i++) + { + bestME[i].ref = m_multipassRef[i][cuGeom.absPartIdx]; + bestME[i].mv = m_multipassMv[i][cuGeom.absPartIdx]; + bestME[i].mvpIdx = m_multipassMvpIdx[i][cuGeom.absPartIdx]; + } + } + } + predInterSearch(interMode, cuGeom, m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400, refMask); /* predInterSearch sets interMode.sa8dBits, but this is ignored */ diff --git a/source/encoder/analysis.h b/source/encoder/analysis.h --- a/source/encoder/analysis.h +++ b/source/encoder/analysis.h @@ -130,6 +130,13 @@ uint32_t m_splitRefIdx[4]; uint64_t* cacheCost; + + analysis2PassFrameData* m_multipassAnalysis; + uint8_t* m_multipassDepth; + MV* m_multipassMv[2]; + int* m_multipassMvpIdx[2]; + int32_t* m_multipassRef[2]; + uint8_t* m_multipassModes; /* refine RD based on QP for rd-levels 5 and 6 */ void qprdRefine(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qp, int32_t lqp); diff --git a/source/encoder/search.cpp b/source/encoder/search.cpp --- a/source/encoder/search.cpp +++ b/source/encoder/search.cpp @@ -2128,7 +2128,7 @@ cu.getNeighbourMV(puIdx, pu.puAbsPartIdx, interMode.interNeighbours); /* Uni-directional prediction */ - if (m_param->analysisMode == X265_ANALYSIS_LOAD) + if (m_param->analysisMode == X265_ANALYSIS_LOAD || (m_param->analysisMultiPassRefine && m_param->rc.bStatRead)) { for (int list = 0; list < numPredDir; list++) { @@ -2153,7 +2153,11 @@ m_me.integral[planes] = interMode.fencYuv->m_integral[list][ref][planes] + puX * pu.width + puY * pu.height * m_slice->m_refFrameList[list][ref]->m_reconPic->m_stride; } setSearchRange(cu, mvp, m_param->searchRange, mvmin, mvmax); - int satdCost = m_me.motionEstimate(&slice->m_mref[list][ref], mvmin, mvmax, mvp, numMvc, mvc, m_param->searchRange, outmv, + MV mvpIn = mvp; + if (m_param->analysisMultiPassRefine && m_param->rc.bStatRead && mvpIdx == bestME[list].mvpIdx) + mvpIn = bestME[list].mv; + + int satdCost = m_me.motionEstimate(&slice->m_mref[list][ref], mvmin, mvmax, mvpIn, numMvc, mvc, m_param->searchRange, outmv, m_param->bSourceReferenceEstimation ? m_slice->m_refFrameList[list][ref]->m_fencPic->getLumaAddr(0) : 0); /* Get total cost of partition, but only include MV bit cost once */ @@ -2162,7 +2166,22 @@ uint32_t cost = (satdCost - mvCost) + m_rdCost.getCost(bits); /* Refine MVP selection, updates: mvpIdx, bits, cost */ - mvp = checkBestMVP(amvp, outmv, mvpIdx, bits, cost); + if (!m_param->analysisMultiPassRefine) + mvp = checkBestMVP(amvp, outmv, mvpIdx, bits, cost); + else + { + /* It is more accurate to compare with actual mvp that was used in motionestimate than amvp[mvpIdx]. Here + the actual mvp is bestME from pass 1 for that mvpIdx */ + int diffBits = m_me.bitcost(outmv, amvp[!mvpIdx]) - m_me.bitcost(outmv, mvpIn); + if (diffBits < 0) + { + mvpIdx = !mvpIdx; + uint32_t origOutBits = bits; + bits = origOutBits + diffBits; + cost = (cost - m_rdCost.getCost(origOutBits)) + m_rdCost.getCost(bits); + } + mvp = amvp[mvpIdx]; + } if (cost < bestME[list].cost) { diff --git a/source/test/rate-control-tests.txt b/source/test/rate-control-tests.txt --- a/source/test/rate-control-tests.txt +++ b/source/test/rate-control-tests.txt @@ -43,3 +43,9 @@ RaceHorses_416x240_30_10bit.yuv,--preset medium --crf 26 --vbv-maxrate 1000 --vbv-bufsize 1000 --pass 1,--preset fast --bitrate 1000 --vbv-maxrate 1000 --vbv-bufsize 700 --pass 3 -F4,--preset slow --bitrate 500 --vbv-maxrate 500 --vbv-bufsize 700 --pass 2 -F4 sita_1920x1080_30.yuv, --preset ultrafast --crf 20 --no-cutree --keyint 50 --min-keyint 50 --no-open-gop --pass 1 --vbv-bufsize 7000 --vbv-maxrate 5000, --preset ultrafast --crf 20 --no-cutree --keyint 50 --min-keyint 50 --no-open-gop --pass 2 --vbv-bufsize 7000 --vbv-maxrate 5000 --repeat-headers sita_1920x1080_30.yuv, --preset medium --crf 20 --no-cutree --keyint 50 --min-keyint 50 --no-open-gop --pass 1 --vbv-bufsize 7000 --vbv-maxrate 5000 --repeat-headers --multi-pass-opt-rps, --preset medium --crf 20 --no-cutree --keyint 50 --min-keyint 50 --no-open-gop --pass 2 --vbv-bufsize 7000 --vbv-maxrate 5000 --repeat-headers --multi-pass-opt-rps + +# multi-pass rate control and analysis +ducks_take_off_1080p50.y4m,--bitrate 6000 --pass 1 --multi-pass-opt-analysis --hash 1 --ssim --psnr +ducks_take_off_1080p50.y4m,--bitrate 6000 --pass 2 --multi-pass-opt-analysis --hash 1 --ssim --psnr +big_buck_bunny_360p24.y4m,--preset veryslow --bitrate 600 --pass 1 --multi-pass-opt-analysis --hash 1 --ssim --psnr +big_buck_bunny_360p24.y4m,--preset veryslow --bitrate 600 --pass 2 --multi-pass-opt-analysis --hash 1 --ssim --psnr
_______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel