On 02/16, santhosh...@multicorewareinc.com wrote: > # HG changeset patch > # User Santhoshini Sekar<santhosh...@multicorewareinc.com> > # Date 1424077388 -19800 > # Mon Feb 16 14:33:08 2015 +0530 > # Node ID 24e3fc60cb4183fff50921a332f0fd0d98cc0f56 > # Parent 59466f1455ce52920eb642cf06db4b4de8d8ff10 > implementation for minimum CU size > > diff -r 59466f1455ce -r 24e3fc60cb41 source/common/cudata.cpp > --- a/source/common/cudata.cpp Mon Feb 16 14:28:19 2015 +0530 > +++ b/source/common/cudata.cpp Mon Feb 16 14:33:08 2015 +0530 > @@ -2066,14 +2066,14 @@ > > #define CU_SET_FLAG(bitfield, flag, value) (bitfield) = ((bitfield) & > (~(flag))) | ((~((value) - 1)) & (flag)) > > -void CUData::calcCTUGeoms(uint32_t ctuWidth, uint32_t ctuHeight, uint32_t > maxCUSize, CUGeom cuDataArray[CUGeom::MAX_GEOMS]) > +void CUData::calcCTUGeoms(uint32_t ctuWidth, uint32_t ctuHeight, uint32_t > maxCUSize, uint32_t minCUSize, CUGeom cuDataArray[CUGeom::MAX_GEOMS]) > { > // Initialize the coding blocks inside the CTB > - for (uint32_t log2CUSize = g_log2Size[maxCUSize], rangeCUIdx = 0; > log2CUSize >= MIN_LOG2_CU_SIZE; log2CUSize--) > + for (uint32_t log2CUSize = g_log2Size[maxCUSize], rangeCUIdx = 0; > log2CUSize >= g_log2Size[minCUSize]; log2CUSize--) > { > uint32_t blockSize = 1 << log2CUSize; > uint32_t sbWidth = 1 << (g_log2Size[maxCUSize] - log2CUSize); > - int32_t lastLevelFlag = log2CUSize == MIN_LOG2_CU_SIZE; > + int32_t lastLevelFlag = log2CUSize == g_log2Size[minCUSize]; > for (uint32_t sbY = 0; sbY < sbWidth; sbY++) > { > for (uint32_t sbX = 0; sbX < sbWidth; sbX++) > diff -r 59466f1455ce -r 24e3fc60cb41 source/common/cudata.h > --- a/source/common/cudata.h Mon Feb 16 14:28:19 2015 +0530 > +++ b/source/common/cudata.h Mon Feb 16 14:33:08 2015 +0530 > @@ -158,7 +158,7 @@ > CUData(); > > void initialize(const CUDataMemPool& dataPool, uint32_t depth, int > csp, int instance); > - static void calcCTUGeoms(uint32_t ctuWidth, uint32_t ctuHeight, uint32_t > maxCUSize, CUGeom cuDataArray[CUGeom::MAX_GEOMS]); > + static void calcCTUGeoms(uint32_t ctuWidth, uint32_t ctuHeight, uint32_t > maxCUSize, uint32_t minCUSize, CUGeom cuDataArray[CUGeom::MAX_GEOMS]); > > void initCTU(const Frame& frame, uint32_t cuAddr, int qp); > void initSubCU(const CUData& ctu, const CUGeom& cuGeom); > diff -r 59466f1455ce -r 24e3fc60cb41 source/common/param.cpp > --- a/source/common/param.cpp Mon Feb 16 14:28:19 2015 +0530 > +++ b/source/common/param.cpp Mon Feb 16 14:33:08 2015 +0530 > @@ -127,6 +127,7 @@ > > /* CU definitions */ > param->maxCUSize = 64; > + param->minCUSize = 8; > param->tuQTMaxInterDepth = 1; > param->tuQTMaxIntraDepth = 1; > param->maxTUSize = 32; > @@ -978,6 +979,8 @@ > "x265 was compiled for 8bit encodes, only 8bit internal depth > supported"); > #endif > > + CHECK(param->minCUSize != 64 && param->minCUSize != 32 && > param->minCUSize != 16 && param->minCUSize != 8, > + "minimim CU size must be 8, 16, 32, or 64"); > CHECK(param->rc.qp < -6 * (param->internalBitDepth - 8) || param->rc.qp > > QP_MAX_SPEC, > "QP exceeds supported range (-QpBDOffsety to 51)"); > CHECK(param->fpsNum == 0 || param->fpsDenom == 0, > @@ -1166,14 +1169,20 @@ > } > else > { > + if (param->minCUSize > param->maxCUSize) > + { > + x265_log(param, X265_LOG_WARNING, "Min CU size should be less than > or equal to max CU size, setting min CU size = %d\n", param->maxCUSize); > + param->minCUSize = param->maxCUSize; > + }
w/s here is borked > uint32_t maxLog2CUSize = (uint32_t)g_log2Size[param->maxCUSize]; > + uint32_t minLog2CUSize = (uint32_t)g_log2Size[param->minCUSize]; > > // set max CU width & height > g_maxCUSize = param->maxCUSize; > g_maxLog2CUSize = maxLog2CUSize; > > // compute actual CU depth with respect to config depth and max > transform size > - g_maxCUDepth = maxLog2CUSize - MIN_LOG2_CU_SIZE; > + g_maxCUDepth = maxLog2CUSize - minLog2CUSize; > g_unitSizeDepth = maxLog2CUSize - LOG2_UNIT_SIZE; If g_maxCUDepth is going to be based on minLog2CUSize, then we must validate this in x265_set_globals(). We have a design decision to make here. We could leave g_maxCUDepth unmodified here and always signal the min CU size as 8x8 but voluntarily never split 16x16 (or 32x32) CUs. We will have to signal no-splits at the lowest remaining level since they will not be implied, this is a compression cost. Or we can signal the intended min CU size and enforce this same limit for all encodes in the same process: a loss of flexibility but a slight gain in compression efficiency. I'd love to hear dissenting opinions, but based on what I know most use-cases that use multiple encodes in a single process will use the same base preset for all of them. In that case, it is better to err on the side of compression efficiency. > // initialize partition order > @@ -1195,7 +1204,7 @@ > if (param->interlaceMode) > x265_log(param, X265_LOG_INFO, "Interlaced field inputs > : %s\n", x265_interlace_names[param->interlaceMode]); > > - x265_log(param, X265_LOG_INFO, "Coding QT: max CU size, min CU size : %d > / %d\n", param->maxCUSize, 8); > + x265_log(param, X265_LOG_INFO, "Coding QT: max CU size, min CU size : %d > / %d\n", param->maxCUSize, param->minCUSize); > > x265_log(param, X265_LOG_INFO, "Residual QT: max TU size, max depth : %d > / %d inter / %d intra\n", > param->maxTUSize, param->tuQTMaxInterDepth, > param->tuQTMaxIntraDepth); > diff -r 59466f1455ce -r 24e3fc60cb41 source/encoder/analysis.cpp > --- a/source/encoder/analysis.cpp Mon Feb 16 14:28:19 2015 +0530 > +++ b/source/encoder/analysis.cpp Mon Feb 16 14:33:08 2015 +0530 > @@ -260,7 +260,7 @@ > checkIntra(md.pred[PRED_INTRA], cuGeom, SIZE_2Nx2N, NULL, NULL); > checkBestMode(md.pred[PRED_INTRA], depth); > > - if (depth == g_maxCUDepth) > + if (cuGeom.log2CUSize == 3 && m_slice->m_sps->quadtreeTULog2MinSize > < 3) > { > md.pred[PRED_INTRA_NxN].cu.initSubCU(parentCTU, cuGeom); > checkIntra(md.pred[PRED_INTRA_NxN], cuGeom, SIZE_NxN, NULL, > NULL); > @@ -472,7 +472,7 @@ > { > case 0: > slave->checkIntra(md.pred[PRED_INTRA], *m_curGeom, SIZE_2Nx2N, > NULL, NULL); > - if (m_curGeom->depth == g_maxCUDepth && m_curGeom->log2CUSize > > m_slice->m_sps->quadtreeTULog2MinSize) > + if (m_curGeom->log2CUSize == 3 && > m_slice->m_sps->quadtreeTULog2MinSize < 3) > slave->checkIntra(md.pred[PRED_INTRA_NxN], *m_curGeom, > SIZE_NxN, NULL, NULL); > break; > > @@ -556,7 +556,7 @@ > if (bTryIntra) > { > md.pred[PRED_INTRA].cu.initSubCU(parentCTU, cuGeom); > - if (depth == g_maxCUDepth && cuGeom.log2CUSize > > m_slice->m_sps->quadtreeTULog2MinSize) > + if (cuGeom.log2CUSize == 3 && > m_slice->m_sps->quadtreeTULog2MinSize < 3) > md.pred[PRED_INTRA_NxN].cu.initSubCU(parentCTU, cuGeom); > } > > @@ -704,7 +704,7 @@ > if (bTryIntra) > { > checkBestMode(md.pred[PRED_INTRA], depth); > - if (depth == g_maxCUDepth && cuGeom.log2CUSize > > m_slice->m_sps->quadtreeTULog2MinSize) > + if (cuGeom.log2CUSize == 3 && > m_slice->m_sps->quadtreeTULog2MinSize < 3) > checkBestMode(md.pred[PRED_INTRA_NxN], depth); > } > } > @@ -1187,7 +1187,7 @@ > checkIntra(md.pred[PRED_INTRA], cuGeom, SIZE_2Nx2N, NULL, > NULL); > checkBestMode(md.pred[PRED_INTRA], depth); > > - if (depth == g_maxCUDepth && cuGeom.log2CUSize > > m_slice->m_sps->quadtreeTULog2MinSize) > + if (cuGeom.log2CUSize == 3 && > m_slice->m_sps->quadtreeTULog2MinSize < 3) > { > md.pred[PRED_INTRA_NxN].cu.initSubCU(parentCTU, cuGeom); > checkIntra(md.pred[PRED_INTRA_NxN], cuGeom, SIZE_NxN, > NULL, NULL); > diff -r 59466f1455ce -r 24e3fc60cb41 source/encoder/encoder.cpp > --- a/source/encoder/encoder.cpp Mon Feb 16 14:28:19 2015 +0530 > +++ b/source/encoder/encoder.cpp Mon Feb 16 14:33:08 2015 +0530 > @@ -961,7 +961,7 @@ > finalLog.cuInterDistribution[depth][m] += > enclog.cuInterDistribution[depth][m]; > } > > - if (depth == g_maxCUDepth) > + if (m_frameEncoder[i]->m_cuGeoms->log2CUSize == 3 && > m_sps.quadtreeTULog2MinSize < 3) > finalLog.cntIntraNxN += enclog.cntIntraNxN; > if (sliceType != I_SLICE) > { > @@ -1054,14 +1054,14 @@ > cuIntraDistribution[1], > cuIntraDistribution[2]); > if (sliceType != I_SLICE) > { > - if (depth == g_maxCUDepth) > + if (g_log2Size[cuSize] == 3 && > m_sps.quadtreeTULog2MinSize < 3) > len += sprintf(stats + len, " %dx%d "X265_LL "%%", > cuSize / 2, cuSize / 2, cntIntraNxN); > } > > len += sprintf(stats + len, ")"); > if (sliceType == I_SLICE) > { > - if (depth == g_maxCUDepth) > + if (g_log2Size[cuSize] == 3 && > m_sps.quadtreeTULog2MinSize < 3) > len += sprintf(stats + len, " %dx%d: "X265_LL "%%", > cuSize / 2, cuSize / 2, cntIntraNxN); > } > } > @@ -1708,10 +1708,10 @@ > m_conformanceWindow.leftOffset = 0; > > /* set pad size if width is not multiple of the minimum CU size */ > - if (p->sourceWidth & (MIN_CU_SIZE - 1)) > + if (p->sourceWidth & (p->minCUSize - 1)) > { > - uint32_t rem = p->sourceWidth & (MIN_CU_SIZE - 1); > - uint32_t padsize = MIN_CU_SIZE - rem; > + uint32_t rem = p->sourceWidth & (p->minCUSize - 1); > + uint32_t padsize = p->minCUSize - rem; > p->sourceWidth += padsize; > > m_conformanceWindow.bEnabled = true; > @@ -1719,10 +1719,10 @@ > } > > /* set pad size if height is not multiple of the minimum CU size */ > - if (p->sourceHeight & (MIN_CU_SIZE - 1)) > + if (p->sourceHeight & (p->minCUSize - 1)) > { > - uint32_t rem = p->sourceHeight & (MIN_CU_SIZE - 1); > - uint32_t padsize = MIN_CU_SIZE - rem; > + uint32_t rem = p->sourceHeight & (p->minCUSize - 1); > + uint32_t padsize = p->minCUSize - rem; > p->sourceHeight += padsize; > > m_conformanceWindow.bEnabled = true; > diff -r 59466f1455ce -r 24e3fc60cb41 source/encoder/frameencoder.cpp > --- a/source/encoder/frameencoder.cpp Mon Feb 16 14:28:19 2015 +0530 > +++ b/source/encoder/frameencoder.cpp Mon Feb 16 14:33:08 2015 +0530 > @@ -143,6 +143,7 @@ > { > /* Geoms only vary between CTUs in the presence of picture edges */ > int maxCUSize = m_param->maxCUSize; > + int minCUSize = m_param->minCUSize; > int heightRem = m_param->sourceHeight & (maxCUSize - 1); > int widthRem = m_param->sourceWidth & (maxCUSize - 1); > int allocGeoms = 1; // body > @@ -157,7 +158,7 @@ > return false; > > // body > - CUData::calcCTUGeoms(maxCUSize, maxCUSize, maxCUSize, m_cuGeoms); > + CUData::calcCTUGeoms(maxCUSize, maxCUSize, maxCUSize, minCUSize, > m_cuGeoms); > memset(m_ctuGeomMap, 0, sizeof(uint32_t) * m_numRows * m_numCols); > if (allocGeoms == 1) > return true; > @@ -166,7 +167,7 @@ > if (widthRem) > { > // right > - CUData::calcCTUGeoms(widthRem, maxCUSize, maxCUSize, m_cuGeoms + > countGeoms * CUGeom::MAX_GEOMS); > + CUData::calcCTUGeoms(widthRem, maxCUSize, maxCUSize, minCUSize, > m_cuGeoms + countGeoms * CUGeom::MAX_GEOMS); > for (uint32_t i = 0; i < m_numRows; i++) > { > uint32_t ctuAddr = m_numCols * (i + 1) - 1; > @@ -177,7 +178,7 @@ > if (heightRem) > { > // bottom > - CUData::calcCTUGeoms(maxCUSize, heightRem, maxCUSize, m_cuGeoms + > countGeoms * CUGeom::MAX_GEOMS); > + CUData::calcCTUGeoms(maxCUSize, heightRem, maxCUSize, minCUSize, > m_cuGeoms + countGeoms * CUGeom::MAX_GEOMS); > for (uint32_t i = 0; i < m_numCols; i++) > { > uint32_t ctuAddr = m_numCols * (m_numRows - 1) + i; > @@ -188,7 +189,7 @@ > if (widthRem) > { > // corner > - CUData::calcCTUGeoms(widthRem, heightRem, maxCUSize, m_cuGeoms + > countGeoms * CUGeom::MAX_GEOMS); > + CUData::calcCTUGeoms(widthRem, heightRem, maxCUSize, minCUSize, > m_cuGeoms + countGeoms * CUGeom::MAX_GEOMS); > > uint32_t ctuAddr = m_numCols * m_numRows - 1; > m_ctuGeomMap[ctuAddr] = countGeoms * CUGeom::MAX_GEOMS; > @@ -1038,7 +1039,7 @@ > else if (ctu.m_partSize[absPartIdx] != SIZE_2Nx2N) > { > /* TODO: log intra modes at absPartIdx +0 to +3 */ > - X265_CHECK(depth == g_maxCUDepth, "Intra NxN found at > improbable depth\n"); > + X265_CHECK(ctu.m_log2CUSize[absPartIdx] == 3 && > ctu.m_slice->m_sps->quadtreeTULog2MinSize < 3, "Intra NxN found at improbable > depth\n"); > log->cntIntraNxN++; > log->cntIntra[depth]--; > } > @@ -1086,7 +1087,7 @@ > > if (ctu.m_partSize[absPartIdx] != SIZE_2Nx2N) > { > - X265_CHECK(depth == g_maxCUDepth, "Intra NxN found at > improbable depth\n"); > + X265_CHECK(ctu.m_log2CUSize[absPartIdx] == 3 && > ctu.m_slice->m_sps->quadtreeTULog2MinSize < 3, "Intra NxN found at improbable > depth\n"); > log->cntIntraNxN++; > /* TODO: log intra modes at absPartIdx +0 to +3 */ > } > diff -r 59466f1455ce -r 24e3fc60cb41 source/x265.h > --- a/source/x265.h Mon Feb 16 14:28:19 2015 +0530 > +++ b/source/x265.h Mon Feb 16 14:33:08 2015 +0530 > @@ -579,6 +579,10 @@ > * frame parallelism will become because of the increase in rows. > default 64 */ > uint32_t maxCUSize; > > + /* Miniumum CU width and height in pixels. The size must be 64, 32, 16, > or 8. > + * default 8 */ > + uint32_t minCUSize; > + > /* Enable rectangular motion prediction partitions (vertical and > * horizontal), available at all CU depths from 64x64 to 8x8. Default is > * disabled */ > _______________________________________________ > x265-devel mailing list > x265-devel@videolan.org > https://mailman.videolan.org/listinfo/x265-devel -- Steve Borho _______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel