sc/source/filter/inc/sheetdatabuffer.hxx | 6 - sc/source/filter/oox/sheetdatabuffer.cxx | 153 +++++++++++++++++-------------- 2 files changed, 90 insertions(+), 69 deletions(-)
New commits: commit deac5c84732c3491a0ef5bf7f8c1552e6def4fc0 Author: Noel Grandin <n...@peralex.com> AuthorDate: Fri Apr 30 18:24:12 2021 +0200 Commit: Noel Grandin <noel.gran...@collabora.co.uk> CommitDate: Fri Apr 30 21:39:51 2021 +0200 tdf#79049 speed up OOXML workbook load (2) the allocation of memory and pointer chasing was slowing things down in the styles conversion. So switch to more cache-dense data structure, and re-arrange the loops to be more cache friendly. The takes the time from 1m9 to 40s for me. Change-Id: I876580adc7823313b0cdb067f2f6b3e61c39ecf8 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/114941 Tested-by: Jenkins Reviewed-by: Noel Grandin <noel.gran...@collabora.co.uk> diff --git a/sc/source/filter/inc/sheetdatabuffer.hxx b/sc/source/filter/inc/sheetdatabuffer.hxx index c49e33280a10..17add16e6234 100644 --- a/sc/source/filter/inc/sheetdatabuffer.hxx +++ b/sc/source/filter/inc/sheetdatabuffer.hxx @@ -22,6 +22,7 @@ #include <vector> #include <map> #include <set> +#include <o3tl/sorted_vector.hxx> #include "richstring.hxx" #include "worksheethelper.hxx" @@ -165,7 +166,8 @@ private: /** Writes all cell formatting attributes to the passed cell range list. (depreciates writeXfIdRangeProperties) */ void applyCellMerging( const ScRange& rRange ); - void addColXfStyle( sal_Int32 nXfId, sal_Int32 nFormatId, const ScRange& rAddress, bool bProcessRowRange = false ); + void addColXfStyles(); + void addColXfStyleProcessRowRanges(); private: /** Stores cell range address and formula token array of an array formula. */ typedef std::pair< ScRange, ApiTokenSequence > ArrayFormula; @@ -200,7 +202,7 @@ private: return lhs.mnEndRow<rhs.mnStartRow; } }; - typedef ::std::set< RowRangeStyle, StyleRowRangeComp > RowStyles; + typedef ::o3tl::sorted_vector< RowRangeStyle, StyleRowRangeComp > RowStyles; typedef ::std::map< sal_Int32, RowStyles > ColStyles; /** Stores information about a merged cell range. */ struct MergedRange diff --git a/sc/source/filter/oox/sheetdatabuffer.cxx b/sc/source/filter/oox/sheetdatabuffer.cxx index de1d2c76f3c9..c9c688c2ca17 100644 --- a/sc/source/filter/oox/sheetdatabuffer.cxx +++ b/sc/source/filter/oox/sheetdatabuffer.cxx @@ -346,57 +346,99 @@ static void addIfNotInMyMap( const StylesBuffer& rStyles, std::map< FormatKeyPai rMap[ FormatKeyPair( nXfId, nFormatId ) ] = rRangeList; } -void SheetDataBuffer::addColXfStyle( sal_Int32 nXfId, sal_Int32 nFormatId, const ScRange& rAddress, bool bProcessRowRange ) -{ - RowRangeStyle aStyleRows; - aStyleRows.mnNumFmt.first = nXfId; - aStyleRows.mnNumFmt.second = nFormatId; - aStyleRows.mnStartRow = rAddress.aStart.Row(); - aStyleRows.mnEndRow = rAddress.aEnd.Row(); - for ( sal_Int32 nCol = rAddress.aStart.Col(); nCol <= rAddress.aEnd.Col(); ++nCol ) +void SheetDataBuffer::addColXfStyles() +{ + std::map< FormatKeyPair, ScRangeList > rangeStyleListMap; + for( const auto& [rFormatKeyPair, rRangeList] : maXfIdRangeLists ) + { + addIfNotInMyMap( getStyles(), rangeStyleListMap, rFormatKeyPair.first, rFormatKeyPair.second, rRangeList ); + } + // gather all ranges that have the same style and apply them in bulk + for ( const auto& [rFormatKeyPair, rRanges] : rangeStyleListMap ) { - if ( !bProcessRowRange ) - maStylesPerColumn[ nCol ].insert( aStyleRows ); - else + for (const ScRange & rAddress : rRanges) { - RowStyles& rRowStyles = maStylesPerColumn[ nCol ]; - // Reset row range for each column + RowRangeStyle aStyleRows; + aStyleRows.mnNumFmt.first = rFormatKeyPair.first; + aStyleRows.mnNumFmt.second = rFormatKeyPair.second; aStyleRows.mnStartRow = rAddress.aStart.Row(); aStyleRows.mnEndRow = rAddress.aEnd.Row(); + for ( sal_Int32 nCol = rAddress.aStart.Col(); nCol <= rAddress.aEnd.Col(); ++nCol ) + maStylesPerColumn[ nCol ].insert( aStyleRows ); + } + } +} - // If aStyleRows includes rows already allocated to a style - // in rRowStyles, then we need to split it into parts. - // ( to occupy only rows that have no style definition) - - // Start iterating at the first element that is not completely before aStyleRows - RowStyles::iterator rows_it = rRowStyles.lower_bound(aStyleRows); - RowStyles::iterator rows_end = rRowStyles.end(); - bool bAddRange = true; - for ( ; rows_it != rows_end; ++rows_it ) +void SheetDataBuffer::addColXfStyleProcessRowRanges() +{ + // count the number of row-range-styles we have + AddressConverter& rAddrConv = getAddressConverter(); + int cnt = 0; + for ( const auto& [nXfId, rRowRangeList] : maXfIdRowRangeList ) + { + if ( nXfId == -1 ) // it's a dud skip it + continue; + cnt += rRowRangeList.size(); + } + // pre-allocate space in the sorted_vector + for ( sal_Int32 nCol = 0; nCol <= rAddrConv.getMaxApiAddress().Col(); ++nCol ) + { + RowStyles& rRowStyles = maStylesPerColumn[ nCol ]; + rRowStyles.reserve(rRowStyles.size() + cnt); + } + const auto nMaxCol = rAddrConv.getMaxApiAddress().Col(); + for ( sal_Int32 nCol = 0; nCol <= nMaxCol; ++nCol ) + { + RowStyles& rRowStyles = maStylesPerColumn[ nCol ]; + for ( const auto& [nXfId, rRowRangeList] : maXfIdRowRangeList ) + { + if ( nXfId == -1 ) // it's a dud skip it + continue; + // get all row ranges for id + for ( const auto& rRange : rRowRangeList ) { - const RowRangeStyle& r = *rows_it; - - // Add the part of aStyleRows that does not overlap with r - if ( aStyleRows.mnStartRow < r.mnStartRow ) + RowRangeStyle aStyleRows; + aStyleRows.mnNumFmt.first = nXfId; + aStyleRows.mnNumFmt.second = -1; + aStyleRows.mnStartRow = rRange.mnFirst; + aStyleRows.mnEndRow = rRange.mnLast; + + // Reset row range for each column + aStyleRows.mnStartRow = rRange.mnFirst; + aStyleRows.mnEndRow = rRange.mnLast; + + // If aStyleRows includes rows already allocated to a style + // in rRowStyles, then we need to split it into parts. + // ( to occupy only rows that have no style definition) + + // Start iterating at the first element that is not completely before aStyleRows + RowStyles::const_iterator rows_it = rRowStyles.lower_bound(aStyleRows); + bool bAddRange = true; + for ( ; rows_it != rRowStyles.end(); ++rows_it ) { - RowRangeStyle aSplit = aStyleRows; - aSplit.mnEndRow = std::min(aStyleRows.mnEndRow, r.mnStartRow - 1); - // Insert with hint that aSplit comes directly before the current position - rRowStyles.insert( rows_it, aSplit ); + const RowRangeStyle& r = *rows_it; + + // Add the part of aStyleRows that does not overlap with r + if ( aStyleRows.mnStartRow < r.mnStartRow ) + { + RowRangeStyle aSplit = aStyleRows; + aSplit.mnEndRow = std::min(aStyleRows.mnEndRow, r.mnStartRow - 1); + rows_it = rRowStyles.insert( aSplit ).first; + } + + // Done if no part of aStyleRows extends beyond r + if ( aStyleRows.mnEndRow <= r.mnEndRow ) + { + bAddRange = false; + break; + } + + // Cut off the part aStyleRows that was handled above + aStyleRows.mnStartRow = r.mnEndRow + 1; } - - // Done if no part of aStyleRows extends beyond r - if ( aStyleRows.mnEndRow <= r.mnEndRow ) - { - bAddRange = false; - break; - } - - // Cut off the part aStyleRows that was handled above - aStyleRows.mnStartRow = r.mnEndRow + 1; + if ( bAddRange ) + rRowStyles.insert( aStyleRows ); } - if ( bAddRange ) - rRowStyles.insert( aStyleRows ); } } } @@ -414,32 +456,9 @@ void SheetDataBuffer::finalizeImport() // write default formatting of remaining row range maXfIdRowRangeList[ maXfIdRowRange.mnXfId ].push_back( maXfIdRowRange.maRowRange ); - std::map< FormatKeyPair, ScRangeList > rangeStyleListMap; - for( const auto& [rFormatKeyPair, rRangeList] : maXfIdRangeLists ) - { - addIfNotInMyMap( getStyles(), rangeStyleListMap, rFormatKeyPair.first, rFormatKeyPair.second, rRangeList ); - } - // gather all ranges that have the same style and apply them in bulk - for ( const auto& [rFormatKeyPair, rRanges] : rangeStyleListMap ) - { - for (size_t i = 0, nSize = rRanges.size(); i < nSize; ++i) - addColXfStyle( rFormatKeyPair.first, rFormatKeyPair.second, rRanges[i]); - } - - for ( const auto& [rXfId, rRowRangeList] : maXfIdRowRangeList ) - { - if ( rXfId == -1 ) // it's a dud skip it - continue; - AddressConverter& rAddrConv = getAddressConverter(); - // get all row ranges for id - for ( const auto& rRange : rRowRangeList ) - { - ScRange aRange( 0, rRange.mnFirst, getSheetIndex(), - rAddrConv.getMaxApiAddress().Col(), rRange.mnLast, getSheetIndex() ); + addColXfStyles(); - addColXfStyle( rXfId, -1, aRange, true ); - } - } + addColXfStyleProcessRowRanges(); ScDocumentImport& rDocImport = getDocImport(); ScDocument& rDoc = rDocImport.getDoc(); _______________________________________________ Libreoffice-commits mailing list libreoffice-comm...@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/libreoffice-commits