On Mon, Oct 31, 2016 at 11:03 PM, chen <chenm...@163.com> wrote: > # HG changeset patch > # User Min Chen <min.c...@multicorewareinc.com> > # Date 1477935084 18000 > # Node ID 9be03f08789954f772a50f26485a9c96ca745497 > # Parent b08109b3701e9b86010c5a5ed0ad7b3d6a051911 > [slices] fix multi-slices output non-determination bug > --- > source/common/common.h | 2 +- > source/encoder/analysis.cpp | 8 +- > source/encoder/frameencoder.cpp | 15 ++--- > source/encoder/motion.cpp | 116 +++++++++++++++++++++++++++--- > -------- > source/encoder/sao.cpp | 7 ++ > source/encoder/search.cpp | 3 + > 6 files changed, 104 insertions(+), 47 deletions(-) > > diff -r b08109b3701e -r 9be03f087899 source/common/common.h > --- a/source/common/common.h Fri Oct 28 10:28:15 2016 +0800 > +++ b/source/common/common.h Mon Oct 31 12:31:24 2016 -0500 > @@ -176,7 +176,7 @@ > > #define X265_MIN(a, b) ((a) < (b) ? (a) : (b)) > #define X265_MAX(a, b) ((a) > (b) ? (a) : (b)) > -#define COPY1_IF_LT(x, y) if ((y) < (x)) (x) = (y); > +#define COPY1_IF_LT(x, y) {if ((y) < (x)) (x) = (y);} > #define COPY2_IF_LT(x, y, a, b) \ > if ((y) < (x)) \ > { \ > diff -r b08109b3701e -r 9be03f087899 source/encoder/analysis.cpp > --- a/source/encoder/analysis.cpp Fri Oct 28 10:28:15 2016 +0800 > +++ b/source/encoder/analysis.cpp Mon Oct 31 12:31:24 2016 -0500 > @@ -1942,12 +1942,12 @@ > if (m_param->maxSlices > 1) > { > // NOTE: First row in slice can't negative > - if ((candMvField[i][0].mv.y < m_sliceMinY) | > (candMvField[i][1].mv.y < m_sliceMinY)) > + if (X265_MIN(candMvField[i][0].mv.y, > candMvField[i][1].mv.y) < m_sliceMinY) > continue; > > // Last row in slice can't reference beyond bound since > it is another slice area > // TODO: we may beyond bound in future since these area > have a chance to finish because we use parallel slices. Necessary prepare > research on load balance > - if ((candMvField[i][0].mv.y > m_sliceMaxY) | > (candMvField[i][1].mv.y > m_sliceMaxY)) > + if (X265_MAX(candMvField[i][0].mv.y, > candMvField[i][1].mv.y) > m_sliceMaxY) > continue; > } > > @@ -2072,12 +2072,12 @@ > if (m_param->maxSlices > 1) > { > // NOTE: First row in slice can't negative > - if ((candMvField[i][0].mv.y < m_sliceMinY) | > (candMvField[i][1].mv.y < m_sliceMinY)) > + if (X265_MIN(candMvField[i][0].mv.y, > candMvField[i][1].mv.y) < m_sliceMinY) > continue; > > // Last row in slice can't reference beyond bound since > it is another slice area > // TODO: we may beyond bound in future since these area > have a chance to finish because we use parallel slices. Necessary prepare > research on load balance > - if ((candMvField[i][0].mv.y > m_sliceMaxY) | > (candMvField[i][1].mv.y > m_sliceMaxY)) > + if (X265_MAX(candMvField[i][0].mv.y, > candMvField[i][1].mv.y) > m_sliceMaxY) > continue; > } > > diff -r b08109b3701e -r 9be03f087899 source/encoder/frameencoder.cpp > --- a/source/encoder/frameencoder.cpp Fri Oct 28 10:28:15 2016 +0800 > +++ b/source/encoder/frameencoder.cpp Mon Oct 31 12:31:24 2016 -0500 > @@ -123,7 +123,7 @@ > int range = m_param->searchRange; /* fpel search */ > range += !!(m_param->searchMethod < 2); /* diamond/hex range check > lag */ > range += NTAPS_LUMA / 2; /* subpel filter half-length > */ > - range += 2 + MotionEstimate::hpelIterationCount(m_param->subpelRefine) > / 2; /* subpel refine steps */ > + range += 2 + (MotionEstimate::hpelIterationCount(m_param->subpelRefine) > + 1) / 2; /* subpel refine steps */ > m_refLagRows = /*(m_param->maxSlices > 1 ? 1 : 0) +*/ 1 + ((range + > g_maxCUSize - 1) / g_maxCUSize); > > // NOTE: 2 times of numRows because both Encoder and Filter in same > queue > @@ -654,8 +654,7 @@ > const uint32_t sliceEndRow = m_sliceBaseRow[sliceId + 1] > - 1; > const uint32_t row = sliceStartRow + rowInSlice; > > - if (row >= m_numRows) > - break; > + X265_CHECK(row < m_numRows, "slices row fault was > detected"); > > if (row > sliceEndRow) > continue; > @@ -674,7 +673,7 @@ > refpic->m_reconRowFlag[ > rowIdx].waitForChange(0); > > if ((bUseWeightP || bUseWeightB) && > m_mref[l][ref].isWeighted) > - m_mref[l][ref].applyWeight(row + > m_refLagRows, m_numRows, sliceEndRow + 1, sliceId); > + m_mref[l][ref].applyWeight(rowIdx, > m_numRows, sliceEndRow, sliceId); > } > } > > @@ -714,7 +713,7 @@ > refpic->m_reconRowFlag[ > rowIdx].waitForChange(0); > > if ((bUseWeightP || bUseWeightB) && > m_mref[l][ref].isWeighted) > - m_mref[list][ref].applyWeight(i + > m_refLagRows, m_numRows, m_numRows, 0); > + m_mref[list][ref].applyWeight(rowIdx, > m_numRows, m_numRows, 0); > } > } > > @@ -1187,8 +1186,8 @@ > // TODO: specially case handle on first and last row > > // Initialize restrict on MV range in slices > - tld.analysis.m_sliceMinY = -(int16_t)(rowInSlice * g_maxCUSize * 4) + > 2 * 4; > - tld.analysis.m_sliceMaxY = (int16_t)((endRowInSlicePlus1 - 1 - row) * > (g_maxCUSize * 4) - 3 * 4); > + tld.analysis.m_sliceMinY = -(int16_t)(rowInSlice * g_maxCUSize * 4) + > 3 * 4; > + tld.analysis.m_sliceMaxY = (int16_t)((endRowInSlicePlus1 - 1 - row) * > (g_maxCUSize * 4) - 4 * 4); > > // Handle single row slice > if (tld.analysis.m_sliceMaxY < tld.analysis.m_sliceMinY) > @@ -1502,7 +1501,7 @@ > > ScopedLock self(curRow.lock); > if ((m_bAllRowsStop && intRow > m_vbvResetTriggerRow) || > - (!bFirstRowInSlice && ((curRow.completed < numCols - 1) || > (m_rows[row - 1].completed < numCols)) && m_rows[row - 1].completed < > m_rows[row].completed + 2)) > + (!bFirstRowInSlice && ((curRow.completed < numCols - 1) || > (m_rows[row - 1].completed < numCols)) && m_rows[row - 1].completed < > curRow.completed + 2)) > { > curRow.active = false; > curRow.busy = false; > diff -r b08109b3701e -r 9be03f087899 source/encoder/motion.cpp > --- a/source/encoder/motion.cpp Fri Oct 28 10:28:15 2016 +0800 > +++ b/source/encoder/motion.cpp Mon Oct 31 12:31:24 2016 -0500 > @@ -278,10 +278,14 @@ > costs[1] += mvcost((omv + MV(m1x, m1y)) << 2); \ > costs[2] += mvcost((omv + MV(m2x, m2y)) << 2); \ > costs[3] += mvcost((omv + MV(m3x, m3y)) << 2); \ > - COPY2_IF_LT(bcost, costs[0], bmv, omv + MV(m0x, m0y)); \ > - COPY2_IF_LT(bcost, costs[1], bmv, omv + MV(m1x, m1y)); \ > - COPY2_IF_LT(bcost, costs[2], bmv, omv + MV(m2x, m2y)); \ > - COPY2_IF_LT(bcost, costs[3], bmv, omv + MV(m3x, m3y)); \ > + if ((g_maxSlices == 1) | ((omv.y + m0y >= mvmin.y) & (omv.y + m0y > <= mvmax.y))) \ > + COPY2_IF_LT(bcost, costs[0], bmv, omv + MV(m0x, m0y)); \ > + if ((g_maxSlices == 1) | ((omv.y + m1y >= mvmin.y) & (omv.y + m1y > <= mvmax.y))) \ > + COPY2_IF_LT(bcost, costs[1], bmv, omv + MV(m1x, m1y)); \ > + if ((g_maxSlices == 1) | ((omv.y + m2y >= mvmin.y) & (omv.y + m2y > <= mvmax.y))) \ > + COPY2_IF_LT(bcost, costs[2], bmv, omv + MV(m2x, m2y)); \ > + if ((g_maxSlices == 1) | ((omv.y + m3y >= mvmin.y) & (omv.y + m3y > <= mvmax.y))) \ > + COPY2_IF_LT(bcost, costs[3], bmv, omv + MV(m3x, m3y)); \ >
Why do you need the `(g_maxSlices == 1) || ` in all the if conditions above? Isn't it safer to do this check for all cases, and not just for multiple slices-per-frame? } > > #define COST_MV_X4_DIR(m0x, m0y, m1x, m1y, m2x, m2y, m3x, m3y, costs) \ > @@ -659,8 +663,10 @@ > do > { > COST_MV_X4_DIR(0, -1, 0, 1, -1, 0, 1, 0, costs); > - COPY1_IF_LT(bcost, (costs[0] << 4) + 1); > - COPY1_IF_LT(bcost, (costs[1] << 4) + 3); > + if ((g_maxSlices == 1) | ((bmv.y - 1 >= mvmin.y) & (bmv.y - 1 > <= mvmax.y))) > + COPY1_IF_LT(bcost, (costs[0] << 4) + 1); > + if ((g_maxSlices == 1) | ((bmv.y + 1 >= mvmin.y) & (bmv.y + 1 > <= mvmax.y))) > + COPY1_IF_LT(bcost, (costs[1] << 4) + 3); > COPY1_IF_LT(bcost, (costs[2] << 4) + 4); > COPY1_IF_LT(bcost, (costs[3] << 4) + 12); > if (!(bcost & 15)) > @@ -698,36 +704,57 @@ > /* equivalent to the above, but eliminates duplicate candidates */ > COST_MV_X3_DIR(-2, 0, -1, 2, 1, 2, costs); > bcost <<= 3; > - COPY1_IF_LT(bcost, (costs[0] << 3) + 2); > - COPY1_IF_LT(bcost, (costs[1] << 3) + 3); > - COPY1_IF_LT(bcost, (costs[2] << 3) + 4); > + if ((g_maxSlices == 1) | ((bmv.y >= mvmin.y) & (bmv.y <= > mvmax.y))) > + COPY1_IF_LT(bcost, (costs[0] << 3) + 2); > + if ((g_maxSlices == 1) | ((bmv.y + 2 >= mvmin.y) & (bmv.y + 2 <= > mvmax.y))) > + { > + COPY1_IF_LT(bcost, (costs[1] << 3) + 3); > + COPY1_IF_LT(bcost, (costs[2] << 3) + 4); > + } > + > COST_MV_X3_DIR(2, 0, 1, -2, -1, -2, costs); > - COPY1_IF_LT(bcost, (costs[0] << 3) + 5); > - COPY1_IF_LT(bcost, (costs[1] << 3) + 6); > - COPY1_IF_LT(bcost, (costs[2] << 3) + 7); > + if ((g_maxSlices == 1) | ((bmv.y >= mvmin.y) & (bmv.y <= > mvmax.y))) > + COPY1_IF_LT(bcost, (costs[0] << 3) + 5); > + if ((g_maxSlices == 1) | ((bmv.y - 2 >= mvmin.y) & (bmv.y - 2 <= > mvmax.y))) > + { > + COPY1_IF_LT(bcost, (costs[1] << 3) + 6); > + COPY1_IF_LT(bcost, (costs[2] << 3) + 7); > + } > > if (bcost & 7) > { > int dir = (bcost & 7) - 2; > - bmv += hex2[dir + 1]; > > - /* half hexagon, not overlapping the previous iteration */ > - for (int i = (merange >> 1) - 1; i > 0 && > bmv.checkRange(mvmin, mvmax); i--) > + if ((g_maxSlices == 1) | ((bmv.y + hex2[dir + 1].y >= > mvmin.y) & (bmv.y + hex2[dir + 1].y <= mvmax.y))) > { > - COST_MV_X3_DIR(hex2[dir + 0].x, hex2[dir + 0].y, > - hex2[dir + 1].x, hex2[dir + 1].y, > - hex2[dir + 2].x, hex2[dir + 2].y, > - costs); > - bcost &= ~7; > - COPY1_IF_LT(bcost, (costs[0] << 3) + 1); > - COPY1_IF_LT(bcost, (costs[1] << 3) + 2); > - COPY1_IF_LT(bcost, (costs[2] << 3) + 3); > - if (!(bcost & 7)) > - break; > - dir += (bcost & 7) - 2; > - dir = mod6m1[dir + 1]; > bmv += hex2[dir + 1]; > - } > + > + /* half hexagon, not overlapping the previous iteration */ > + for (int i = (merange >> 1) - 1; i > 0 && > bmv.checkRange(mvmin, mvmax); i--) > + { > + COST_MV_X3_DIR(hex2[dir + 0].x, hex2[dir + 0].y, > + hex2[dir + 1].x, hex2[dir + 1].y, > + hex2[dir + 2].x, hex2[dir + 2].y, > + costs); > + bcost &= ~7; > + > + if ((g_maxSlices == 1) | ((bmv.y + hex2[dir + 0].y >= > mvmin.y) & (bmv.y + hex2[dir + 0].y <= mvmax.y))) > + COPY1_IF_LT(bcost, (costs[0] << 3) + 1); > + > + if ((g_maxSlices == 1) | ((bmv.y + hex2[dir + 1].y >= > mvmin.y) & (bmv.y + hex2[dir + 1].y <= mvmax.y))) > + COPY1_IF_LT(bcost, (costs[1] << 3) + 2); > + > + if ((g_maxSlices == 1) | ((bmv.y + hex2[dir + 2].y >= > mvmin.y) & (bmv.y + hex2[dir + 2].y <= mvmax.y))) > + COPY1_IF_LT(bcost, (costs[2] << 3) + 3); > + > + if (!(bcost & 7)) > + break; > + > + dir += (bcost & 7) - 2; > + dir = mod6m1[dir + 1]; > + bmv += hex2[dir + 1]; > + } > + } // if ((g_maxSlices == 1) | ((bmv.y + hex2[dir + 1].y >= > mvmin.y) & (bmv.y + hex2[dir + 1].y <= mvmax.y))) > } > bcost >>= 3; > #endif // if 0 > @@ -735,15 +762,21 @@ > /* square refine */ > int dir = 0; > COST_MV_X4_DIR(0, -1, 0, 1, -1, 0, 1, 0, costs); > - COPY2_IF_LT(bcost, costs[0], dir, 1); > - COPY2_IF_LT(bcost, costs[1], dir, 2); > + if ((g_maxSlices == 1) | ((bmv.y - 1 >= mvmin.y) & (bmv.y - 1 <= > mvmax.y))) > + COPY2_IF_LT(bcost, costs[0], dir, 1); > + if ((g_maxSlices == 1) | ((bmv.y + 1 >= mvmin.y) & (bmv.y + 1 <= > mvmax.y))) > + COPY2_IF_LT(bcost, costs[1], dir, 2); > COPY2_IF_LT(bcost, costs[2], dir, 3); > COPY2_IF_LT(bcost, costs[3], dir, 4); > COST_MV_X4_DIR(-1, -1, -1, 1, 1, -1, 1, 1, costs); > - COPY2_IF_LT(bcost, costs[0], dir, 5); > - COPY2_IF_LT(bcost, costs[1], dir, 6); > - COPY2_IF_LT(bcost, costs[2], dir, 7); > - COPY2_IF_LT(bcost, costs[3], dir, 8); > + if ((g_maxSlices == 1) | ((bmv.y - 1 >= mvmin.y) & (bmv.y - 1 <= > mvmax.y))) > + COPY2_IF_LT(bcost, costs[0], dir, 5); > + if ((g_maxSlices == 1) | ((bmv.y + 1 >= mvmin.y) & (bmv.y + 1 <= > mvmax.y))) > + COPY2_IF_LT(bcost, costs[1], dir, 6); > + if ((g_maxSlices == 1) | ((bmv.y - 1 >= mvmin.y) & (bmv.y - 1 <= > mvmax.y))) > + COPY2_IF_LT(bcost, costs[2], dir, 7); > + if ((g_maxSlices == 1) | ((bmv.y + 1 >= mvmin.y) & (bmv.y + 1 <= > mvmax.y))) > + COPY2_IF_LT(bcost, costs[3], dir, 8); > bmv += square1[dir]; > break; > } > @@ -756,6 +789,7 @@ > /* refine predictors */ > omv = bmv; > ucost1 = bcost; > + X265_CHECK((g_maxSlices == 1) | ((pmv.y >= mvmin.y) & (pmv.y <= > mvmax.y)), "pmv outside of search range!"); > DIA1_ITER(pmv.x, pmv.y); > if (pmv.notZero()) > DIA1_ITER(0, 0); > @@ -1099,6 +1133,7 @@ > if ((g_maxSlices > 1) & ((bmv.y < qmvmin.y) | (bmv.y > qmvmax.y))) > { > bmv.y = x265_min(x265_max(bmv.y, qmvmin.y), qmvmax.y); > + bcost = subpelCompare(ref, bmv, satd) + mvcost(bmv); > } > > if (!bcost) > @@ -1113,6 +1148,11 @@ > for (int i = 1; i <= wl.hpel_dirs; i++) > { > MV qmv = bmv + square1[i] * 2; > + > + /* skip invalid range */ > + if ((g_maxSlices > 1) & ((qmv.y < qmvmin.y) | (qmv.y > > qmvmax.y))) > + continue; > + > int cost = ref->lowresQPelCost(fenc, blockOffset, qmv, sad) + > mvcost(qmv); > COPY2_IF_LT(bcost, cost, bdir, i); > } > @@ -1124,6 +1164,11 @@ > for (int i = 1; i <= wl.qpel_dirs; i++) > { > MV qmv = bmv + square1[i]; > + > + /* skip invalid range */ > + if ((g_maxSlices > 1) & ((qmv.y < qmvmin.y) | (qmv.y > > qmvmax.y))) > + continue; > + > int cost = ref->lowresQPelCost(fenc, blockOffset, qmv, satd) > + mvcost(qmv); > COPY2_IF_LT(bcost, cost, bdir, i); > } > @@ -1189,6 +1234,9 @@ > } > } > > + // check mv range for slice bound > + X265_CHECK((g_maxSlices == 1) | ((bmv.y >= qmvmin.y) & (bmv.y <= > qmvmax.y)), "mv beyond range!"); > + > x265_emms(); > outQMv = bmv; > return bcost; > diff -r b08109b3701e -r 9be03f087899 source/encoder/sao.cpp > --- a/source/encoder/sao.cpp Fri Oct 28 10:28:15 2016 +0800 > +++ b/source/encoder/sao.cpp Mon Oct 31 12:31:24 2016 -0500 > @@ -1206,12 +1206,19 @@ > void SAO::rdoSaoUnitRowEnd(const SAOParam* saoParam, int numctus) > { > if (!saoParam->bSaoFlag[0]) > + { > m_depthSaoRate[0 * SAO_DEPTHRATE_SIZE + m_refDepth] = 1.0; > + } > else > + { > + assert(m_numNoSao[0] <= numctus); > m_depthSaoRate[0 * SAO_DEPTHRATE_SIZE + m_refDepth] = > m_numNoSao[0] / ((double)numctus); > + } > > if (!saoParam->bSaoFlag[1]) > + { > m_depthSaoRate[1 * SAO_DEPTHRATE_SIZE + m_refDepth] = 1.0; > + } > else > m_depthSaoRate[1 * SAO_DEPTHRATE_SIZE + m_refDepth] = > m_numNoSao[1] / ((double)numctus); > } > diff -r b08109b3701e -r 9be03f087899 source/encoder/search.cpp > --- a/source/encoder/search.cpp Fri Oct 28 10:28:15 2016 +0800 > +++ b/source/encoder/search.cpp Mon Oct 31 12:31:24 2016 -0500 > @@ -2545,6 +2545,9 @@ > /* conditional clipping for frame parallelism */ > mvmin.y = X265_MIN(mvmin.y, (int16_t)m_refLagPixels); > mvmax.y = X265_MIN(mvmax.y, (int16_t)m_refLagPixels); > + > + /* conditional clipping for negative mv range */ > + mvmax.y = X265_MAX(mvmax.y, mvmin.y); > } > > /* Note: this function overwrites the RD cost variables of interMode, but > leaves the sa8d cost unharmed */ > > > _______________________________________________ > x265-devel mailing list > x265-devel@videolan.org > https://mailman.videolan.org/listinfo/x265-devel > >
_______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel