As of now offset can be removed from CU structure. We don't have any planned optimization with it. If needed we can add it later.
On Mon, Sep 29, 2014 at 4:11 PM, Deepthi Nandakumar < deep...@multicorewareinc.com> wrote: > Ashok/Santhoshini - pls review. Does removing offsets affect any planned > optimizations? > > On Sat, Sep 27, 2014 at 7:03 AM, <dtyx...@gmail.com> wrote: > >> # HG changeset patch >> # User David T Yuen <dtyx...@gmail.com> >> # Date 1411781537 25200 >> # Node ID 85098db291ae133981419868685358227b8b1437 >> # Parent 4b18a27b52ac69a16805c2b455d4f891cdd4a057 >> Changes for loadCTUData >> >> Replaced getDepthScanIdx() with table g_depthScanIdx >> Moved Analysis::loadCTUData to TComDataCU::loadCTUData since it only >> works with TComDataCU fields >> Replaced CU.offsets[2] with local variables in loadCTUData since that is >> the only place it was set and used >> minor changes to reduce the number of local variables in loadCTUData >> >> diff -r 4b18a27b52ac -r 85098db291ae source/Lib/TLibCommon/TComDataCU.cpp >> --- a/source/Lib/TLibCommon/TComDataCU.cpp Fri Sep 26 10:48:07 2014 >> +0530 >> +++ b/source/Lib/TLibCommon/TComDataCU.cpp Fri Sep 26 18:32:17 2014 >> -0700 >> @@ -2407,4 +2407,43 @@ >> result.firstSignificanceMapContext = bIsLuma ? 21 : 12; >> } >> >> +void TComDataCU::loadCTUData(uint32_t maxCUSize) >> +{ >> + // Initialize the coding blocks inside the CTB >> + for (uint32_t log2CUSize = g_log2Size[maxCUSize], rangeCUIdx = 0; >> log2CUSize >= MIN_LOG2_CU_SIZE; log2CUSize--) >> + { >> + uint32_t blockSize = 1 << log2CUSize; >> + uint32_t sbWidth = 1 << (g_log2Size[maxCUSize] - log2CUSize); >> + int32_t last_level_flag = log2CUSize == MIN_LOG2_CU_SIZE; >> + for (uint32_t sb_y = 0; sb_y < sbWidth; sb_y++) >> + { >> + for (uint32_t sb_x = 0; sb_x < sbWidth; sb_x++) >> + { >> + uint32_t depth_idx = g_depthScanIdx[sb_y][sb_x]; >> + uint32_t cuIdx = rangeCUIdx + depth_idx; >> + uint32_t child_idx = rangeCUIdx + sbWidth * sbWidth + >> (depth_idx << 2); >> + uint32_t px = m_cuPelX + sb_x * blockSize; >> + uint32_t py = m_cuPelY + sb_y * blockSize; >> + int32_t present_flag = px < >> m_pic->m_origPicYuv->m_picWidth && py < m_pic->m_origPicYuv->m_picHeight; >> + int32_t split_mandatory_flag = present_flag && >> !last_level_flag && (px + blockSize > m_pic->m_origPicYuv->m_picWidth || py >> + blockSize > m_pic->m_origPicYuv->m_picHeight); >> + >> + /* Offset of the luma CU in the X, Y direction in terms >> of pixels from the CTU origin */ >> + uint32_t xOffset = (sb_x * blockSize) >> 3; >> + uint32_t yOffset = (sb_y * blockSize) >> 3; >> + >> + CU *cu = m_CULocalData + cuIdx; >> + cu->log2CUSize = log2CUSize; >> + cu->childIdx = child_idx; >> + cu->encodeIdx = g_depthScanIdx[yOffset][xOffset]; >> + cu->flags = 0; >> + >> + CU_SET_FLAG(cu->flags, CU::PRESENT, present_flag); >> + CU_SET_FLAG(cu->flags, CU::SPLIT_MANDATORY | CU::SPLIT, >> split_mandatory_flag); >> + CU_SET_FLAG(cu->flags, CU::LEAF, last_level_flag); >> + } >> + } >> + rangeCUIdx += sbWidth * sbWidth; >> + } >> +} >> + >> //! \} >> diff -r 4b18a27b52ac -r 85098db291ae source/Lib/TLibCommon/TComDataCU.h >> --- a/source/Lib/TLibCommon/TComDataCU.h Fri Sep 26 10:48:07 2014 >> +0530 >> +++ b/source/Lib/TLibCommon/TComDataCU.h Fri Sep 26 18:32:17 2014 >> -0700 >> @@ -114,7 +114,6 @@ >> uint32_t log2CUSize; // Log of the CU size. >> uint32_t childIdx; // Index of the first child CU >> uint32_t encodeIdx; // Encoding index of this CU in terms of 8x8 >> blocks. >> - uint32_t offset[2]; // Offset of the luma CU in the X, Y direction >> in terms of pixels from the CTU origin >> uint32_t flags; // CU flags. >> }; >> >> @@ -274,6 +273,7 @@ >> void initCU(Frame* pic, uint32_t cuAddr); >> void initEstData(); >> void initSubCU(TComDataCU* cu, uint32_t partUnitIdx, >> uint32_t depth, int qp); >> + void loadCTUData(uint32_t maxCUSize); >> >> void copyToSubCU(TComDataCU* lcu, uint32_t partUnitIdx, >> uint32_t depth); >> void copyPartFrom(TComDataCU* cu, uint32_t partUnitIdx, >> uint32_t depth, bool isRDObasedAnalysis = true); >> diff -r 4b18a27b52ac -r 85098db291ae source/Lib/TLibCommon/TComRom.cpp >> --- a/source/Lib/TLibCommon/TComRom.cpp Fri Sep 26 10:48:07 2014 +0530 >> +++ b/source/Lib/TLibCommon/TComRom.cpp Fri Sep 26 18:32:17 2014 -0700 >> @@ -517,5 +517,18 @@ >> {256, 64, 16, 4} >> }; >> >> +/* g_depthScanIdx [y][x] */ >> +const uint32_t g_depthScanIdx[8][8] = >> +{ >> + { 0, 1, 4, 5, 16, 17, 20, 21, }, >> + { 2, 3, 6, 7, 18, 19, 22, 23, }, >> + { 8, 9, 12, 13, 24, 25, 28, 29, }, >> + { 10, 11, 14, 15, 26, 27, 30, 31, }, >> + { 32, 33, 36, 37, 48, 49, 52, 53, }, >> + { 34, 35, 38, 39, 50, 51, 54, 55, }, >> + { 40, 41, 44, 45, 56, 57, 60, 61, }, >> + { 42, 43, 46, 47, 58, 59, 62, 63, } >> +}; >> + >> } >> //! \} >> diff -r 4b18a27b52ac -r 85098db291ae source/Lib/TLibCommon/TComRom.h >> --- a/source/Lib/TLibCommon/TComRom.h Fri Sep 26 10:48:07 2014 +0530 >> +++ b/source/Lib/TLibCommon/TComRom.h Fri Sep 26 18:32:17 2014 -0700 >> @@ -159,6 +159,8 @@ >> >> extern const uint32_t g_depthInc[3][4]; >> >> +extern const uint32_t g_depthScanIdx[8][8]; >> + >> } >> >> #endif //ifndef X265_TCOMROM_H >> diff -r 4b18a27b52ac -r 85098db291ae source/encoder/analysis.cpp >> --- a/source/encoder/analysis.cpp Fri Sep 26 10:48:07 2014 +0530 >> +++ b/source/encoder/analysis.cpp Fri Sep 26 18:32:17 2014 -0700 >> @@ -30,32 +30,6 @@ >> >> using namespace x265; >> >> -namespace { >> -// TO DO: Remove this function with a table. >> -int getDepthScanIdx(int x, int y, int size) >> -{ >> - if (size == 1) >> - return 0; >> - >> - int depth = 0; >> - int h = size >> 1; >> - >> - if (x >= h) >> - { >> - x -= h; >> - depth += h * h; >> - } >> - >> - if (y >= h) >> - { >> - y -= h; >> - depth += 2 * h * h; >> - } >> - >> - return depth + getDepthScanIdx(x, y, h); >> -} >> -} >> - >> Analysis::Analysis() >> { >> m_bestPredYuv = NULL; >> @@ -253,47 +227,6 @@ >> delete [] m_origYuv; >> } >> >> -void Analysis::loadCTUData(TComDataCU* parentCU) >> -{ >> - uint8_t cuRange[2]= {MIN_LOG2_CU_SIZE, >> g_log2Size[m_param->maxCUSize]}; >> - >> - // Initialize the coding blocks inside the CTB >> - for (int rangeIdx = cuRange[1], rangeCUIdx = 0; rangeIdx >= >> cuRange[0]; rangeIdx--) >> - { >> - uint32_t log2CUSize = rangeIdx; >> - int32_t blockSize = 1 << log2CUSize; >> - uint32_t b8Width = 1 << (cuRange[1] - 3); >> - uint32_t sbWidth = 1 << (cuRange[1] - rangeIdx); >> - int32_t last_level_flag = rangeIdx == cuRange[0]; >> - for (uint32_t sb_y = 0; sb_y < sbWidth; sb_y++) >> - { >> - for (uint32_t sb_x = 0; sb_x < sbWidth; sb_x++) >> - { >> - uint32_t depth_idx = getDepthScanIdx(sb_x, sb_y, >> sbWidth); >> - uint32_t cuIdx = rangeCUIdx + depth_idx; >> - uint32_t child_idx = rangeCUIdx + sbWidth * sbWidth + >> (depth_idx << 2); >> - int32_t px = parentCU->getCUPelX() + sb_x * blockSize; >> - int32_t py = parentCU->getCUPelY() + sb_y * blockSize; >> - int32_t present_flag = px < >> parentCU->m_pic->m_origPicYuv->m_picWidth && py < >> parentCU->m_pic->m_origPicYuv->m_picHeight; >> - int32_t split_mandatory_flag = present_flag && >> !last_level_flag && (px + blockSize > >> parentCU->m_pic->m_origPicYuv->m_picWidth || py + blockSize > >> parentCU->m_pic->m_origPicYuv->m_picHeight); >> - >> - CU *cu = parentCU->m_CULocalData + cuIdx; >> - cu->log2CUSize = log2CUSize; >> - cu->childIdx = child_idx; >> - cu->offset[0] = sb_x * blockSize; >> - cu->offset[1] = sb_y * blockSize; >> - cu->encodeIdx = getDepthScanIdx(cu->offset[0] >> 3, >> cu->offset[1] >> 3, b8Width); >> - cu->flags = 0; >> - >> - CU_SET_FLAG(cu->flags, CU::PRESENT, present_flag); >> - CU_SET_FLAG(cu->flags, CU::SPLIT_MANDATORY | CU::SPLIT, >> split_mandatory_flag); >> - CU_SET_FLAG(cu->flags, CU::LEAF, last_level_flag); >> - } >> - } >> - rangeCUIdx += sbWidth * sbWidth; >> - } >> -} >> - >> void Analysis::compressCU(TComDataCU* cu) >> { >> Frame* pic = cu->m_pic; >> diff -r 4b18a27b52ac -r 85098db291ae source/encoder/analysis.h >> --- a/source/encoder/analysis.h Fri Sep 26 10:48:07 2014 +0530 >> +++ b/source/encoder/analysis.h Fri Sep 26 18:32:17 2014 -0700 >> @@ -104,7 +104,6 @@ >> bool create(uint32_t totalDepth, uint32_t maxWidth); >> void destroy(); >> void compressCU(TComDataCU* cu); >> - void loadCTUData(TComDataCU* cu); >> >> protected: >> >> diff -r 4b18a27b52ac -r 85098db291ae source/encoder/frameencoder.cpp >> --- a/source/encoder/frameencoder.cpp Fri Sep 26 10:48:07 2014 +0530 >> +++ b/source/encoder/frameencoder.cpp Fri Sep 26 18:32:17 2014 -0700 >> @@ -686,7 +686,7 @@ >> // load current best state from go-on entropy coder >> curRow.rdEntropyCoders[0][CI_CURR_BEST].load(rowCoder); >> >> - tld.analysis.loadCTUData(cu); >> + cu->loadCTUData(m_param->maxCUSize); >> tld.analysis.m_quant.setQPforQuant(cu); >> tld.analysis.compressCU(cu); // Does all the CU analysis >> >> _______________________________________________ >> x265-devel mailing list >> x265-devel@videolan.org >> https://mailman.videolan.org/listinfo/x265-devel >> > > > _______________________________________________ > x265-devel mailing list > x265-devel@videolan.org > https://mailman.videolan.org/listinfo/x265-devel > >
_______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel