# HG changeset patch # User Steve Borho <st...@borho.org> # Date 1405145839 18000 # Sat Jul 12 01:17:19 2014 -0500 # Node ID 7163a5f40da0bd2836e435eff4c854229727a781 # Parent 6af56f7c870355152c9897a7bca9fbd8047dd5fc encoder: allocate thread local data from the context of the worker thread
This is a simple change, data structure wise, but it delays allocation of the ThreadLocalData structures until the worker thread which will use the structure can allocate it itself. On multi-socket systems, this will make these structures closer to reach, cache wise. We probably need to flag our worker threads with core affinity to make this properly effective. diff -r 6af56f7c8703 -r 7163a5f40da0 source/encoder/encoder.cpp --- a/source/encoder/encoder.cpp Fri Jul 11 19:38:35 2014 -0500 +++ b/source/encoder/encoder.cpp Sat Jul 12 01:17:19 2014 -0500 @@ -96,14 +96,10 @@ } /* Allocate thread local data shared by all frame encoders */ - ThreadPool *pool = ThreadPool::getThreadPool(); - const int poolThreadCount = pool ? pool->getThreadCount() : 1; - m_threadLocalData = new ThreadLocalData[poolThreadCount]; + const int poolThreadCount = m_threadPool ? m_threadPool->getThreadCount() : 1; + m_threadLocalData = X265_MALLOC(ThreadLocalData*, poolThreadCount); if (m_threadLocalData) - { - for (int i = 0; i < poolThreadCount; i++) - m_threadLocalData[i].init(*this); - } + memset(m_threadLocalData, 0, sizeof(ThreadLocalData*) * poolThreadCount); else m_aborted = true; @@ -164,8 +160,13 @@ delete [] m_frameEncoder; } + const int poolThreadCount = m_threadPool ? m_threadPool->getThreadCount() : 1; if (m_threadLocalData) - delete [] m_threadLocalData; + { + for (int i = 0; i < poolThreadCount; i++) + delete m_threadLocalData[i]; + X265_FREE(m_threadLocalData); + } if (m_lookahead) { @@ -623,7 +624,9 @@ { for (int i = 0; i < poolThreadCount; i++) { - StatisticLog& enclog = m_threadLocalData[i].m_cuCoder.m_sliceTypeLog[sliceType]; + if (!m_threadLocalData[i]) + continue; + StatisticLog& enclog = m_threadLocalData[i]->m_cuCoder.m_sliceTypeLog[sliceType]; if (depth == 0) finalLog.totalCu += enclog.totalCu; finalLog.cntIntra[depth] += enclog.cntIntra[depth]; diff -r 6af56f7c8703 -r 7163a5f40da0 source/encoder/encoder.h --- a/source/encoder/encoder.h Fri Jul 11 19:38:35 2014 -0500 +++ b/source/encoder/encoder.h Sat Jul 12 01:17:19 2014 -0500 @@ -71,7 +71,6 @@ { private: - bool m_aborted; // fatal error detected int m_pocLast; ///< time index (POC) int m_encodedFrameNum; int m_outputCount; @@ -166,7 +165,8 @@ x265_param* m_param; RateControl* m_rateControl; - ThreadLocalData* m_threadLocalData; + ThreadLocalData** m_threadLocalData; + bool m_aborted; // fatal error detected bool m_bEnableRDOQ; diff -r 6af56f7c8703 -r 7163a5f40da0 source/encoder/frameencoder.cpp --- a/source/encoder/frameencoder.cpp Fri Jul 11 19:38:35 2014 -0500 +++ b/source/encoder/frameencoder.cpp Sat Jul 12 01:17:19 2014 -0500 @@ -837,13 +837,32 @@ const int realRow = row >> 1; const int typeNum = row & 1; - ThreadLocalData& tld = threadId >= 0 ? m_top->m_threadLocalData[threadId] : m_tld; + ThreadLocalData* tld; + if (threadId < 0) + tld = &m_tld; + else + { + if (!m_top->m_threadLocalData[threadId]) + { + m_top->m_threadLocalData[threadId] = new ThreadLocalData; + if (m_top->m_threadLocalData[threadId]) + m_top->m_threadLocalData[threadId]->init(*m_top); + else + { + x265_log(m_param, X265_LOG_ERROR, "unable to allocate thread local data, aborting\n"); + m_completionEvent.trigger(); + m_top->m_aborted = true; + return; + } + } + tld = m_top->m_threadLocalData[threadId]; + } if (!typeNum) - processRowEncoder(realRow, tld); + processRowEncoder(realRow, *tld); else { - processRowFilter(realRow, tld); + processRowFilter(realRow, *tld); // NOTE: Active next row if (realRow != m_numRows - 1) _______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel