# HG changeset patch # User Min Chen <chenm...@163.com> # Date 1394845741 25200 # Node ID 27c40f54ac64752f5dea816535ac02b62ba9a019 # Parent ed48f84e541b2916313e067ad04696c4f8514a47 optimize: rewrite TComTrQuant::xGetICRate
diff -r ed48f84e541b -r 27c40f54ac64 source/Lib/TLibCommon/TComRom.cpp --- a/source/Lib/TLibCommon/TComRom.cpp Fri Mar 14 14:21:34 2014 +0530 +++ b/source/Lib/TLibCommon/TComRom.cpp Fri Mar 14 18:09:01 2014 -0700 @@ -437,9 +437,9 @@ const uint32_t g_groupIdx[32] = { 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9 }; // Rice parameters for absolute transform levels -const uint32_t g_goRiceRange[5] = { 7, 14, 26, 46, 78 }; +const uint8_t g_goRiceRange[5] = { 7, 14, 26, 46, 78 }; -const uint32_t g_goRicePrefixLen[5] = { 8, 7, 6, 5, 4 }; +//const uint8_t g_goRicePrefixLen[5] = { 8, 7, 6, 5, 4 }; int g_quantTSDefault4x4[16] = { diff -r ed48f84e541b -r 27c40f54ac64 source/Lib/TLibCommon/TComRom.h --- a/source/Lib/TLibCommon/TComRom.h Fri Mar 14 14:21:34 2014 +0530 +++ b/source/Lib/TLibCommon/TComRom.h Fri Mar 14 18:09:01 2014 -0700 @@ -131,8 +131,8 @@ extern const uint32_t g_groupIdx[32]; extern const uint32_t g_minInGroup[10]; -extern const uint32_t g_goRiceRange[5]; //!< maximum value coded with Rice codes -extern const uint32_t g_goRicePrefixLen[5]; //!< prefix length for each maximum value +extern const uint8_t g_goRiceRange[5]; //!< maximum value coded with Rice codes +//extern const uint8_t g_goRicePrefixLen[5]; //!< prefix length for each maximum value // ==================================================================================================================== // Bit-depth diff -r ed48f84e541b -r 27c40f54ac64 source/Lib/TLibCommon/TComTrQuant.cpp --- a/source/Lib/TLibCommon/TComTrQuant.cpp Fri Mar 14 14:21:34 2014 +0530 +++ b/source/Lib/TLibCommon/TComTrQuant.cpp Fri Mar 14 18:09:01 2014 -0700 @@ -60,6 +60,11 @@ #define RDOQ_CHROMA 1 ///< use of RDOQ in chroma +inline static int x265_min_fast(int x, int y) +{ + return y + ((x - y) & ((x - y) >> (sizeof(int) * CHAR_BIT - 1))); // min(x, y) +} + // ==================================================================================================================== // TComTrQuant class member functions // ==================================================================================================================== @@ -568,7 +573,6 @@ uint32_t c1Idx = 0; uint32_t c2Idx = 0; int cgLastScanPos = -1; - int baseLevel; uint32_t cgNum = 1 << codingParameters.log2TrSizeCG * 2; int scanPos; @@ -609,6 +613,13 @@ if (lastScanPos >= 0) { + const uint32_t c1c2Idx = ((c1Idx - 8) >> (sizeof(int) * CHAR_BIT - 1)) + (((-(int)c2Idx) >> (sizeof(int) * CHAR_BIT - 1)) + 1) * 2; + const uint32_t baseLevel = ((uint32_t)0xD9 >> (c1c2Idx * 2)) & 3; // {1, 2, 1, 3} + assert(C2FLAG_NUMBER == 1); + assert(!!(c1Idx < C1FLAG_NUMBER) == ((c1Idx - 8) >> (sizeof(int) * CHAR_BIT - 1))); + assert(!!(c2Idx == 0) == ((-(int)c2Idx) >> (sizeof(int) * CHAR_BIT - 1)) + 1); + assert(baseLevel == ((c1Idx < C1FLAG_NUMBER) ? (2 + (c2Idx == 0)) : 1)); + rateIncUp[blkPos] = 0; rateIncDown[blkPos] = 0; deltaU[blkPos] = 0; @@ -636,9 +647,9 @@ deltaU[blkPos] = (levelDouble - ((int)level << qbits)) >> (qbits - 8); if (level > 0) { - int rateNow = xGetICRate(level, oneCtx, absCtx, goRiceParam, c1Idx, c2Idx); - rateIncUp[blkPos] = xGetICRate(level + 1, oneCtx, absCtx, goRiceParam, c1Idx, c2Idx) - rateNow; - rateIncDown[blkPos] = xGetICRate(level - 1, oneCtx, absCtx, goRiceParam, c1Idx, c2Idx) - rateNow; + int rateNow = xGetICRate(level, level - baseLevel, oneCtx, absCtx, goRiceParam, c1c2Idx); + rateIncUp[blkPos] = xGetICRate(level + 1, level + 1 - baseLevel, oneCtx, absCtx, goRiceParam, c1c2Idx) - rateNow; + rateIncDown[blkPos] = xGetICRate(level - 1, level - 1 - baseLevel, oneCtx, absCtx, goRiceParam, c1c2Idx) - rateNow; } else // level == 0 { @@ -647,7 +658,6 @@ dstCoeff[blkPos] = level; baseCost += costCoeff[scanPos]; - baseLevel = (c1Idx < C1FLAG_NUMBER) ? (2 + (c2Idx < C2FLAG_NUMBER)) : 1; if (level >= baseLevel) { if (goRiceParam < 4 && level > (3 << goRiceParam)) @@ -1229,65 +1239,75 @@ } inline int TComTrQuant::xGetICRate(uint32_t absLevel, - uint16_t ctxNumOne, - uint16_t ctxNumAbs, - uint16_t absGoRice, - uint32_t c1Idx, - uint32_t c2Idx) const + int32_t diffLevel, + uint32_t ctxNumOne, + uint32_t ctxNumAbs, + uint32_t absGoRice, + uint32_t c1c2Idx) const { + assert(c1c2Idx <= 3); + assert(absGoRice <= 4); + if (absLevel == 0) + { + assert(diffLevel < 0); + return 0; + } int rate = 0; - uint32_t baseLevel = (c1Idx < C1FLAG_NUMBER) ? (2 + (c2Idx < C2FLAG_NUMBER)) : 1; + const int *greaterOneBits = m_estBitsSbac->greaterOneBits[ctxNumOne]; + const int *levelAbsBits = m_estBitsSbac->levelAbsBits[ctxNumAbs]; - if (absLevel >= baseLevel) + if (diffLevel < 0) { - uint32_t symbol = absLevel - baseLevel; - uint32_t maxVlc = g_goRiceRange[absGoRice]; + assert(absLevel >= 0 && absLevel <= 2); + rate += greaterOneBits[(absLevel == 2)]; + + if (absLevel == 2) + { + rate += levelAbsBits[0]; + } + } + else + { + uint32_t symbol = diffLevel; + const uint32_t maxVlc = g_goRiceRange[absGoRice]; bool expGolomb = (symbol > maxVlc); if (expGolomb) { absLevel = symbol - maxVlc; - int egs = 1; - for (uint32_t max = 2; absLevel >= max; max <<= 1, egs += 2) - { - } + + // NOTE: mapping to x86 hardware instruction BSR + unsigned long size; + CLZ32(size, absLevel); + int egs = size * 2 + 1; + //int egs = 1; + //for (uint32_t max = 2; absLevel >= max; max <<= 1, egs += 2) + //{ + //} + //assert(egs == size * 2 + 1); rate += egs << 15; - symbol = std::min<uint32_t>(symbol, (maxVlc + 1)); + + // NOTE: in here, expGolomb=true means (symbol >= maxVlc + 1) + assert(x265_min_fast(symbol, (maxVlc + 1)) == maxVlc + 1); + symbol = maxVlc + 1; } - uint16_t prefLen = uint16_t(symbol >> absGoRice) + 1; - uint16_t numBins = std::min<uint32_t>(prefLen, g_goRicePrefixLen[absGoRice]) + absGoRice; + uint32_t prefLen = (symbol >> absGoRice) + 1; + uint32_t numBins = x265_min_fast(prefLen + absGoRice, 8/*g_goRicePrefixLen[absGoRice] + absGoRice*/); rate += numBins << 15; - if (c1Idx < C1FLAG_NUMBER) + if (c1c2Idx & 1) { - rate += m_estBitsSbac->greaterOneBits[ctxNumOne][1]; + rate += greaterOneBits[1]; + } - if (c2Idx < C2FLAG_NUMBER) - { - rate += m_estBitsSbac->levelAbsBits[ctxNumAbs][1]; - } + if (c1c2Idx == 3) + { + rate += levelAbsBits[1]; } } - else if (absLevel == 0) - { - return 0; - } - else if (absLevel == 1) - { - rate += m_estBitsSbac->greaterOneBits[ctxNumOne][0]; - } - else if (absLevel == 2) - { - rate += m_estBitsSbac->greaterOneBits[ctxNumOne][1]; - rate += m_estBitsSbac->levelAbsBits[ctxNumAbs][0]; - } - else - { - assert(0); - } return rate; } diff -r ed48f84e541b -r 27c40f54ac64 source/Lib/TLibCommon/TComTrQuant.h --- a/source/Lib/TLibCommon/TComTrQuant.h Fri Mar 14 14:21:34 2014 +0530 +++ b/source/Lib/TLibCommon/TComTrQuant.h Fri Mar 14 18:09:01 2014 -0700 @@ -199,7 +199,7 @@ inline double xGetICRateCost(uint32_t absLevel, uint16_t ctxNumOne, uint16_t ctxNumAbs, uint16_t absGoRice, uint32_t c1Idx, uint32_t c2Idx) const; - inline int xGetICRate(uint32_t absLevel, uint16_t ctxNumOne, uint16_t ctxNumAbs, uint16_t absGoRice, uint32_t c1Idx, uint32_t c2Idx) const; + inline int xGetICRate(uint32_t absLevel, int32_t diffLevel, uint32_t ctxNumOne, uint32_t ctxNumAbs, uint32_t absGoRice, uint32_t c1c2Idx) const; inline double xGetRateLast(uint32_t posx, uint32_t posy) const; diff -r ed48f84e541b -r 27c40f54ac64 source/common/threading.h --- a/source/common/threading.h Fri Mar 14 14:21:34 2014 +0530 +++ b/source/common/threading.h Fri Mar 14 18:09:01 2014 -0700 @@ -48,6 +48,7 @@ #include <sys/time.h> #include <unistd.h> +#define CLZ32(id, x) id = (unsigned long)__builtin_clz(x) ^ 31 #define CTZ64(id, x) id = (unsigned long)__builtin_ctzll(x) #define ATOMIC_OR(ptr, mask) __sync_or_and_fetch(ptr, mask) #define ATOMIC_CAS(ptr, oldval, newval) __sync_val_compare_and_swap(ptr, oldval, newval) @@ -121,6 +122,7 @@ #define ATOMIC_OR(ptr, mask) InterlockedOr64((volatile LONG64*)ptr, mask) #endif // if _WIN32_WINNT <= _WIN32_WINNT_WINXP +#define CLZ32(id, x) _BitScanReverse(&id, x) #define CTZ64(id, x) _BitScanForward64(&id, x) #define ATOMIC_CAS(ptr, oldval, newval) (uint64_t)_InterlockedCompareExchange64((volatile LONG64*)ptr, newval, oldval) #define ATOMIC_CAS32(ptr, oldval, newval) (uint64_t)_InterlockedCompareExchange((volatile LONG*)ptr, newval, oldval) _______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel