sc/source/filter/inc/sheetdatabuffer.hxx | 2 sc/source/filter/oox/sheetdatabuffer.cxx | 125 ++++++++++++++++--------------- sc/source/filter/oox/stylesbuffer.cxx | 4 3 files changed, 69 insertions(+), 62 deletions(-)
New commits: commit 0d62ab61e982a0967386e40f21829fc95870b969 Author: Noel Grandin <[email protected]> AuthorDate: Fri May 9 19:51:52 2025 +0200 Commit: Noel Grandin <[email protected]> CommitDate: Sat May 10 08:37:31 2025 +0200 tdf#163015 FILEOPEN: Opening specific XLSX file takes long time we spend most of our time inserting into the output sorted_vector, which ends up needing a lot of data movement when the output vector becomes large. Re-arrange the data structures so we can do this in one pass, and instead of inserting into the output vector, we can just append to it. Change-Id: If729e8ca1f2e6b5cf546c17331f1f3716674f2cb Reviewed-on: https://gerrit.libreoffice.org/c/core/+/185099 Reviewed-by: Noel Grandin <[email protected]> Tested-by: Jenkins diff --git a/sc/source/filter/inc/sheetdatabuffer.hxx b/sc/source/filter/inc/sheetdatabuffer.hxx index 538631330082..cf3dd2eeed7b 100644 --- a/sc/source/filter/inc/sheetdatabuffer.hxx +++ b/sc/source/filter/inc/sheetdatabuffer.hxx @@ -217,7 +217,7 @@ private: MergedRangeVector maMergedRanges; /// Merged cell ranges. MergedRangeVector maCenterFillRanges; /// Merged cell ranges from 'center across' or 'fill' alignment. bool mbPendingSharedFmla; /// True = maSharedFmlaAddr and maSharedBaseAddr are valid. - std::map< sal_Int32, std::vector< ValueRange > > maXfIdRowRangeList; /// Cached XF identifiers for a ranges of rows, we try and process rowranges with the same XF id together + std::vector< XfIdRowRange > maXfIdRowRangeList; /// row ranges with style ids }; } // namespace oox::xls diff --git a/sc/source/filter/oox/sheetdatabuffer.cxx b/sc/source/filter/oox/sheetdatabuffer.cxx index e9286d5f1523..5fa7f268e43a 100644 --- a/sc/source/filter/oox/sheetdatabuffer.cxx +++ b/sc/source/filter/oox/sheetdatabuffer.cxx @@ -281,15 +281,14 @@ void SheetDataBuffer::setRowFormat( sal_Int32 nRow, sal_Int32 nXfId, bool bCusto // try to expand cached row range, if formatting is equal if( (maXfIdRowRange.maRowRange.mnLast < 0) || !maXfIdRowRange.tryExpand( nRow, nXfId ) ) { - - maXfIdRowRangeList[ maXfIdRowRange.mnXfId ].push_back( maXfIdRowRange.maRowRange ); + maXfIdRowRangeList.push_back( maXfIdRowRange ); maXfIdRowRange.set( nRow, nXfId ); } } else if( maXfIdRowRange.maRowRange.mnLast >= 0 ) { // finish last cached row range - maXfIdRowRangeList[ maXfIdRowRange.mnXfId ].push_back( maXfIdRowRange.maRowRange ); + maXfIdRowRangeList.push_back( maXfIdRowRange ); maXfIdRowRange.set( -1, -1 ); } } @@ -376,75 +375,83 @@ void SheetDataBuffer::addColXfStyleProcessRowRanges() // count the number of row-range-styles we have AddressConverter& rAddrConv = getAddressConverter(); int cnt = 0; - for ( const auto& [nXfId, rRowRangeList] : maXfIdRowRangeList ) + for ( const auto& rRange : maXfIdRowRangeList ) { - if ( nXfId == -1 ) // it's a dud skip it + if ( rRange.mnXfId == -1 ) // it's a dud skip it continue; - cnt += rRowRangeList.size(); - } - // pre-allocate space in the sorted_vector - for ( sal_Int32 nCol = 0; nCol <= rAddrConv.getMaxApiAddress().Col(); ++nCol ) - { - RowStyles& rRowStyles = maStylesPerColumn[ nCol ]; - rRowStyles.reserve(rRowStyles.size() + cnt); + ++cnt; } + // sort the row ranges, so we spend less time moving data around + // when we insert into aStyleRows + std::sort(maXfIdRowRangeList.begin(), maXfIdRowRangeList.end(), + [](const XfIdRowRange& lhs, const XfIdRowRange& rhs) + { + return lhs.maRowRange.mnFirst < rhs.maRowRange.mnFirst; + }); const auto nMaxCol = rAddrConv.getMaxApiAddress().Col(); for ( sal_Int32 nCol = 0; nCol <= nMaxCol; ++nCol ) { RowStyles& rRowStyles = maStylesPerColumn[ nCol ]; - for ( auto& [nXfId, rRowRangeList] : maXfIdRowRangeList ) + TmpRowStyles aTempRowStyles; + aTempRowStyles.reserve(rRowStyles.size() + cnt); + RowStyles::const_iterator rows_it = rRowStyles.begin(); + // get all row ranges for id + for ( const auto& rRange : maXfIdRowRangeList ) { - if ( nXfId == -1 ) // it's a dud skip it + if ( rRange.mnXfId == -1 ) // it's a dud skip it continue; - // sort the row ranges, so we spend less time moving data around - // when we insert into aStyleRows - std::sort(rRowRangeList.begin(), rRowRangeList.end(), - [](const ValueRange& lhs, const ValueRange& rhs) - { - return lhs.mnFirst < rhs.mnFirst; - }); - // get all row ranges for id - for ( const auto& rRange : rRowRangeList ) + RowRangeStyle aStyleRows; + aStyleRows.mnNumFmt.first = rRange.mnXfId; + aStyleRows.mnNumFmt.second = -1; + + // Reset row range for each column + aStyleRows.mnStartRow = rRange.maRowRange.mnFirst; + aStyleRows.mnEndRow = rRange.maRowRange.mnLast; + + // If aStyleRows includes rows already allocated to a style + // in rRowStyles, then we need to split it into parts. + // ( to occupy only rows that have no style definition) + + // Start iterating at the first element that is not completely before aStyleRows + while (rows_it != rRowStyles.end() && rows_it->mnEndRow < aStyleRows.mnStartRow) { - RowRangeStyle aStyleRows; - aStyleRows.mnNumFmt.first = nXfId; - aStyleRows.mnNumFmt.second = -1; - - // Reset row range for each column - aStyleRows.mnStartRow = rRange.mnFirst; - aStyleRows.mnEndRow = rRange.mnLast; - - // If aStyleRows includes rows already allocated to a style - // in rRowStyles, then we need to split it into parts. - // ( to occupy only rows that have no style definition) - - // Start iterating at the first element that is not completely before aStyleRows - RowStyles::const_iterator rows_it = rRowStyles.lower_bound(aStyleRows); - bool bAddRange = true; - for ( ; rows_it != rRowStyles.end(); ++rows_it ) + aTempRowStyles.push_back(*rows_it); + ++rows_it; + } + bool bAddRange = true; + while ( rows_it != rRowStyles.end() ) + { + // Add the part of aStyleRows that does not overlap with r + if ( aStyleRows.mnStartRow < rows_it->mnStartRow ) + { + RowRangeStyle aSplit = aStyleRows; + aSplit.mnEndRow = std::min(aStyleRows.mnEndRow, rows_it->mnStartRow - 1); + aTempRowStyles.push_back(aSplit); + } + + // Done if no part of aStyleRows extends beyond r + if ( aStyleRows.mnEndRow <= rows_it->mnEndRow ) { - // Add the part of aStyleRows that does not overlap with r - if ( aStyleRows.mnStartRow < rows_it->mnStartRow ) - { - RowRangeStyle aSplit = aStyleRows; - aSplit.mnEndRow = std::min(aStyleRows.mnEndRow, rows_it->mnStartRow - 1); - rows_it = rRowStyles.insert( aSplit ).first; - } - - // Done if no part of aStyleRows extends beyond r - if ( aStyleRows.mnEndRow <= rows_it->mnEndRow ) - { - bAddRange = false; - break; - } - - // Cut off the part aStyleRows that was handled above - aStyleRows.mnStartRow = rows_it->mnEndRow + 1; + bAddRange = false; + break; } - if ( bAddRange ) - rRowStyles.insert( aStyleRows ); + + // Cut off the part aStyleRows that was handled above + aStyleRows.mnStartRow = rows_it->mnEndRow + 1; + + aTempRowStyles.push_back(*rows_it); + ++rows_it; } + if ( bAddRange ) + aTempRowStyles.push_back( aStyleRows ); + } + while ( rows_it != rRowStyles.end() ) + { + aTempRowStyles.push_back(*rows_it); + ++rows_it; } + rRowStyles.clear(); + rRowStyles.insert_sorted_unique_vector(std::move(aTempRowStyles)); } } @@ -472,7 +479,7 @@ void SheetDataBuffer::finalizeImport() finalizeTableOperation( rRange, rModel ); // write default formatting of remaining row range - maXfIdRowRangeList[ maXfIdRowRange.mnXfId ].push_back( maXfIdRowRange.maRowRange ); + maXfIdRowRangeList.push_back( maXfIdRowRange ); addColXfStyles(); commit 0a6d9fe495c5ed77fa8a56eb7f070d43ebaa725d Author: Noel Grandin <[email protected]> AuthorDate: Fri May 9 20:00:36 2025 +0200 Commit: Noel Grandin <[email protected]> CommitDate: Sat May 10 08:37:19 2025 +0200 reduce some data copying Change-Id: I9f6bb99df4505dd0c5bbfd8b40d9642864a9fb07 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/185100 Tested-by: Jenkins Reviewed-by: Noel Grandin <[email protected]> diff --git a/sc/source/filter/oox/stylesbuffer.cxx b/sc/source/filter/oox/stylesbuffer.cxx index c3af8de44034..72ed6cd59882 100644 --- a/sc/source/filter/oox/stylesbuffer.cxx +++ b/sc/source/filter/oox/stylesbuffer.cxx @@ -2272,10 +2272,10 @@ void Xf::applyPatternToAttrList( AttrList& rAttrs, SCROW nRow1, SCROW nRow2, sal if (!pCachedPattern) rCache.add(nXfId, nNumFmtId, const_cast<ScPatternAttr*>(aEntry.getScPatternAttr())); - rAttrs.maAttrs.push_back(aEntry); - if (!rDocImport.isLatinScript(*aEntry.getScPatternAttr())) rAttrs.mbLatinNumFmtOnly = false; + + rAttrs.maAttrs.push_back(std::move(aEntry)); } void Xf::writeToDoc( ScDocumentImport& rDoc, const ScRange& rRange )
