If we split these common data into one class, we have to pass pointer of FrameFilter to ParallelFilter, it made slower access in process (see below), so I make a copy in local class member. old: class -> offset -> numRows new: pointer to FrameFilter -> class -> offset -> numRows
At 2016-01-14 16:01:19,"Deepthi Nandakumar" <deep...@multicorewareinc.com> wrote: Min, Sure - what I mean is, can we have a class that looks like: class FrameFilter { public: x265_param* m_param; Frame* m_frame; FrameEncoder* m_frameEncoder; FrameData* m_encData; int m_hChromaShift; int m_vChromaShift; int m_pad[2]; uint32_t m_numRows; uint32_t m_numCols; uint32_t m_lastWidth; ................. .................. } class ParallelFilter { public: uint32_t m_rowHeight; uint32_t m_row; uint32_t m_rowAddr; ParallelFilter* m_prevRow; SAO m_sao; ThreadSafeInteger m_lastCol; /* The column that next to process */ ThreadSafeInteger m_allowedCol; /* The column that processed from Encode pipeline */ ThreadSafeInteger m_lastDeblocked; /* The column that finished all of Deblock stages */ } The common variables are in FrameFilter, and ParallelFilter contains only the row specific data. On Wed, Jan 13, 2016 at 12:15 AM, chen <chenm...@163.com> wrote: I fixed the crash and modify/remove FrameFilter::processPostCu, I spending ~64 bytes every row. In my design, FrameFilter just one instance every frame and ParallelFilter instance for every row. At 2016-01-12 13:37:36,"Deepthi Nandakumar" <deep...@multicorewareinc.com> wrote: Min, thanks. Queuing this. I still think the ParallelFilter class needs a refactor - not nested, but inherited from FrameFilter. And FrameFilter contains all the constants for a single frame - numCols, numRows etc. This will also clean up FrameFilter::processPostCu (which is causing the crash in no-deblock, no-sao, since now you'll only refer to FrameFilter class members and not ParallelFilter (which is null if sao and deblock are disabled). On Tue, Jan 12, 2016 at 2:36 AM, Min Chen <chenm...@163.com> wrote: # HG changeset patch # User Min Chen <chenm...@163.com> # Date 1452545402 21600 # Node ID 1c273c83a4478ed39b0e6734eec1ba1cfccd07d6 # Parent 19cfada7162147f293e37302e4c7f9c1760928a0 fix deadlock and output change on new ParallelFilter framework. (Issue #225) The bug from two part: 1. we old sync system allow latest column execute parallelism 2. new ParallelFilter logic will distribute threads and waitting the status change, there have a race conditions --- source/encoder/frameencoder.cpp | 108 +++++++++++++++++++-------------------- 1 files changed, 53 insertions(+), 55 deletions(-) diff -r 19cfada71621 -r 1c273c83a447 source/encoder/frameencoder.cpp --- a/source/encoder/frameencoder.cpp Mon Jan 11 16:28:39 2016 +0530 +++ b/source/encoder/frameencoder.cpp Mon Jan 11 14:50:02 2016 -0600 @@ -962,6 +962,58 @@ // Save CABAC state for next row curRow.bufferedEntropy.loadContexts(rowCoder); + /* SAO parameter estimation using non-deblocked pixels for CTU bottom and right boundary areas */ + if (m_param->bEnableSAO && m_param->bSaoNonDeblocked) + m_frameFilter.m_parallelFilter[row].m_sao.calcSaoStatsCu_BeforeDblk(m_frame, col, row); + + /* Deblock with idle threading */ + if (m_param->bEnableLoopFilter | m_param->bEnableSAO) + { + // TODO: Multiple Threading + // Delay ONE row to avoid Intra Prediction Conflict + if (m_pool && (row >= 1)) + { + // Waitting last threading finish + m_frameFilter.m_parallelFilter[row - 1].waitForExit(); + + // Processing new group + int allowCol = col; + + // avoid race condition on last column + if (row >= 2) + { + allowCol = X265_MIN(((col == numCols - 1) ? m_frameFilter.m_parallelFilter[row - 2].m_lastDeblocked.get() + : m_frameFilter.m_parallelFilter[row - 2].m_lastCol.get()), (int)col); + } + m_frameFilter.m_parallelFilter[row - 1].m_allowedCol.set(allowCol); + m_frameFilter.m_parallelFilter[row - 1].tryBondPeers(*this, 1); + } + + // Last Row may start early + if (m_pool && (row == m_numRows - 1)) + { + // Waiting for the last thread to finish + m_frameFilter.m_parallelFilter[row].waitForExit(); + + // Deblocking last row + int allowCol = col; + + // avoid race condition on last column + if (row >= 2) + { + allowCol = X265_MIN(((col == numCols - 1) ? m_frameFilter.m_parallelFilter[row - 1].m_lastDeblocked.get() + : m_frameFilter.m_parallelFilter[row - 1].m_lastCol.get()), (int)col); + } + m_frameFilter.m_parallelFilter[row].m_allowedCol.set(allowCol); + m_frameFilter.m_parallelFilter[row].tryBondPeers(*this, 1); + } + } + // Both Loopfilter and SAO Disabled + else + { + m_frameFilter.processPostCu(row, col); + } + // Completed CU processing curRow.completed++; @@ -1091,60 +1143,6 @@ } } - // TODO: move Deblock and SAO to before VBV check - - /* SAO parameter estimation using non-deblocked pixels for CTU bottom and right boundary areas */ - if (m_param->bEnableSAO && m_param->bSaoNonDeblocked) - m_frameFilter.m_parallelFilter[row].m_sao.calcSaoStatsCu_BeforeDblk(m_frame, col, row); - - /* Deblock with idle threading */ - if (m_param->bEnableLoopFilter | m_param->bEnableSAO) - { - // TODO: Multiple Threading - // Delay ONE row to avoid Intra Prediction Conflict - if (m_pool && (row >= 1)) - { - // Waitting last threading finish - m_frameFilter.m_parallelFilter[row - 1].waitForExit(); - - // Processing new group - int allowCol = col; - - // avoid race condition on last column - if (row >= 2) - { - allowCol = X265_MIN(((col == numCols - 1) ? m_frameFilter.m_parallelFilter[row - 2].m_lastDeblocked.get() - : m_frameFilter.m_parallelFilter[row - 2].m_lastCol.get()), (int)col); - } - m_frameFilter.m_parallelFilter[row - 1].m_allowedCol.set(allowCol); - m_frameFilter.m_parallelFilter[row - 1].tryBondPeers(*this, 1); - } - - // Last Row may start early - if (m_pool && (row == m_numRows - 1)) - { - // Waiting for the last thread to finish - m_frameFilter.m_parallelFilter[row].waitForExit(); - - // Deblocking last row - int allowCol = col; - - // avoid race condition on last column - if (row >= 2) - { - allowCol = X265_MIN(((col == numCols - 1) ? m_frameFilter.m_parallelFilter[row - 1].m_lastDeblocked.get() - : m_frameFilter.m_parallelFilter[row - 1].m_lastCol.get()), (int)col); - } - m_frameFilter.m_parallelFilter[row].m_allowedCol.set(allowCol); - m_frameFilter.m_parallelFilter[row].tryBondPeers(*this, 1); - } - } - // Both Loopfilter and SAO Disabled - else - { - m_frameFilter.processPostCu(row, col); - } - if (m_param->bEnableWavefront && curRow.completed >= 2 && row < m_numRows - 1 && (!m_bAllRowsStop || intRow + 1 < m_vbvResetTriggerRow)) { @@ -1161,7 +1159,7 @@ ScopedLock self(curRow.lock); if ((m_bAllRowsStop && intRow > m_vbvResetTriggerRow) || - (row > 0 && curRow.completed < numCols - 1 && m_rows[row - 1].completed < m_rows[row].completed + 2)) + (row > 0 && ((curRow.completed < numCols - 1) || (m_rows[row - 1].completed < numCols)) && m_rows[row - 1].completed < m_rows[row].completed + 2)) { curRow.active = false; curRow.busy = false; _______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel -- Deepthi Nandakumar Engineering Manager, x265 Multicoreware, Inc _______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel -- Deepthi Nandakumar Engineering Manager, x265 Multicoreware, Inc
_______________________________________________ x265-devel mailing list x265-devel@videolan.org https://mailman.videolan.org/listinfo/x265-devel