sc/source/filter/inc/sheetdatabuffer.hxx |    2 
 sc/source/filter/oox/sheetdatabuffer.cxx |  125 ++++++++++++++++---------------
 sc/source/filter/oox/stylesbuffer.cxx    |    4 
 3 files changed, 69 insertions(+), 62 deletions(-)

New commits:
commit 0d62ab61e982a0967386e40f21829fc95870b969
Author:     Noel Grandin <[email protected]>
AuthorDate: Fri May 9 19:51:52 2025 +0200
Commit:     Noel Grandin <[email protected]>
CommitDate: Sat May 10 08:37:31 2025 +0200

    tdf#163015 FILEOPEN: Opening specific XLSX file takes long time
    
    we spend most of our time inserting into the output sorted_vector,
    which ends up needing a lot of data movement when the output
    vector becomes large.
    Re-arrange the data structures so we can do this in one pass,
    and instead of inserting into the output vector, we can just
    append to it.
    
    Change-Id: If729e8ca1f2e6b5cf546c17331f1f3716674f2cb
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/185099
    Reviewed-by: Noel Grandin <[email protected]>
    Tested-by: Jenkins

diff --git a/sc/source/filter/inc/sheetdatabuffer.hxx 
b/sc/source/filter/inc/sheetdatabuffer.hxx
index 538631330082..cf3dd2eeed7b 100644
--- a/sc/source/filter/inc/sheetdatabuffer.hxx
+++ b/sc/source/filter/inc/sheetdatabuffer.hxx
@@ -217,7 +217,7 @@ private:
     MergedRangeVector   maMergedRanges;         /// Merged cell ranges.
     MergedRangeVector   maCenterFillRanges;     /// Merged cell ranges from 
'center across' or 'fill' alignment.
     bool                mbPendingSharedFmla;    /// True = maSharedFmlaAddr 
and maSharedBaseAddr are valid.
-    std::map< sal_Int32, std::vector< ValueRange > > maXfIdRowRangeList; /// 
Cached XF identifiers for a ranges of rows, we try and process rowranges with 
the same XF id together
+    std::vector< XfIdRowRange > maXfIdRowRangeList; /// row ranges with style 
ids
 };
 
 } // namespace oox::xls
diff --git a/sc/source/filter/oox/sheetdatabuffer.cxx 
b/sc/source/filter/oox/sheetdatabuffer.cxx
index e9286d5f1523..5fa7f268e43a 100644
--- a/sc/source/filter/oox/sheetdatabuffer.cxx
+++ b/sc/source/filter/oox/sheetdatabuffer.cxx
@@ -281,15 +281,14 @@ void SheetDataBuffer::setRowFormat( sal_Int32 nRow, 
sal_Int32 nXfId, bool bCusto
         // try to expand cached row range, if formatting is equal
         if( (maXfIdRowRange.maRowRange.mnLast < 0) || 
!maXfIdRowRange.tryExpand( nRow, nXfId ) )
         {
-
-            maXfIdRowRangeList[ maXfIdRowRange.mnXfId ].push_back( 
maXfIdRowRange.maRowRange );
+            maXfIdRowRangeList.push_back( maXfIdRowRange );
             maXfIdRowRange.set( nRow, nXfId );
         }
     }
     else if( maXfIdRowRange.maRowRange.mnLast >= 0 )
     {
         // finish last cached row range
-        maXfIdRowRangeList[ maXfIdRowRange.mnXfId ].push_back( 
maXfIdRowRange.maRowRange );
+        maXfIdRowRangeList.push_back( maXfIdRowRange );
         maXfIdRowRange.set( -1, -1 );
     }
 }
@@ -376,75 +375,83 @@ void SheetDataBuffer::addColXfStyleProcessRowRanges()
     // count the number of row-range-styles we have
     AddressConverter& rAddrConv = getAddressConverter();
     int cnt = 0;
-    for ( const auto& [nXfId, rRowRangeList] : maXfIdRowRangeList )
+    for ( const auto& rRange : maXfIdRowRangeList )
     {
-        if ( nXfId == -1 ) // it's a dud skip it
+        if ( rRange.mnXfId == -1 ) // it's a dud skip it
             continue;
-        cnt += rRowRangeList.size();
-    }
-    // pre-allocate space in the sorted_vector
-    for ( sal_Int32 nCol = 0; nCol <= rAddrConv.getMaxApiAddress().Col(); 
++nCol )
-    {
-       RowStyles& rRowStyles = maStylesPerColumn[ nCol ];
-       rRowStyles.reserve(rRowStyles.size() + cnt);
+        ++cnt;
     }
+    // sort the row ranges, so we spend less time moving data around
+    // when we insert into aStyleRows
+    std::sort(maXfIdRowRangeList.begin(), maXfIdRowRangeList.end(),
+        [](const XfIdRowRange& lhs, const XfIdRowRange& rhs)
+        {
+            return lhs.maRowRange.mnFirst < rhs.maRowRange.mnFirst;
+        });
     const auto nMaxCol = rAddrConv.getMaxApiAddress().Col();
     for ( sal_Int32 nCol = 0; nCol <= nMaxCol; ++nCol )
     {
         RowStyles& rRowStyles = maStylesPerColumn[ nCol ];
-        for ( auto& [nXfId, rRowRangeList] : maXfIdRowRangeList )
+        TmpRowStyles aTempRowStyles;
+        aTempRowStyles.reserve(rRowStyles.size() + cnt);
+        RowStyles::const_iterator rows_it = rRowStyles.begin();
+        // get all row ranges for id
+        for ( const auto& rRange : maXfIdRowRangeList )
         {
-            if ( nXfId == -1 ) // it's a dud skip it
+            if ( rRange.mnXfId == -1 ) // it's a dud skip it
                 continue;
-            // sort the row ranges, so we spend less time moving data around
-            // when we insert into aStyleRows
-            std::sort(rRowRangeList.begin(), rRowRangeList.end(),
-                [](const ValueRange& lhs, const ValueRange& rhs)
-                {
-                    return lhs.mnFirst < rhs.mnFirst;
-                });
-            // get all row ranges for id
-            for ( const auto& rRange : rRowRangeList )
+            RowRangeStyle aStyleRows;
+            aStyleRows.mnNumFmt.first = rRange.mnXfId;
+            aStyleRows.mnNumFmt.second = -1;
+
+            // Reset row range for each column
+            aStyleRows.mnStartRow = rRange.maRowRange.mnFirst;
+            aStyleRows.mnEndRow = rRange.maRowRange.mnLast;
+
+            // If aStyleRows includes rows already allocated to a style
+            // in rRowStyles, then we need to split it into parts.
+            // ( to occupy only rows that have no style definition)
+
+            // Start iterating at the first element that is not completely 
before aStyleRows
+            while (rows_it != rRowStyles.end() && rows_it->mnEndRow < 
aStyleRows.mnStartRow)
             {
-                RowRangeStyle aStyleRows;
-                aStyleRows.mnNumFmt.first = nXfId;
-                aStyleRows.mnNumFmt.second = -1;
-
-                // Reset row range for each column
-                aStyleRows.mnStartRow = rRange.mnFirst;
-                aStyleRows.mnEndRow = rRange.mnLast;
-
-                // If aStyleRows includes rows already allocated to a style
-                // in rRowStyles, then we need to split it into parts.
-                // ( to occupy only rows that have no style definition)
-
-                // Start iterating at the first element that is not completely 
before aStyleRows
-                RowStyles::const_iterator rows_it = 
rRowStyles.lower_bound(aStyleRows);
-                bool bAddRange = true;
-                for ( ; rows_it != rRowStyles.end(); ++rows_it )
+                aTempRowStyles.push_back(*rows_it);
+                ++rows_it;
+            }
+            bool bAddRange = true;
+            while ( rows_it != rRowStyles.end() )
+            {
+                // Add the part of aStyleRows that does not overlap with r
+                if ( aStyleRows.mnStartRow < rows_it->mnStartRow )
+                {
+                    RowRangeStyle aSplit = aStyleRows;
+                    aSplit.mnEndRow = std::min(aStyleRows.mnEndRow, 
rows_it->mnStartRow - 1);
+                    aTempRowStyles.push_back(aSplit);
+                }
+
+                // Done if no part of aStyleRows extends beyond r
+                if ( aStyleRows.mnEndRow <= rows_it->mnEndRow )
                 {
-                    // Add the part of aStyleRows that does not overlap with r
-                    if ( aStyleRows.mnStartRow < rows_it->mnStartRow )
-                    {
-                        RowRangeStyle aSplit = aStyleRows;
-                        aSplit.mnEndRow = std::min(aStyleRows.mnEndRow, 
rows_it->mnStartRow - 1);
-                        rows_it = rRowStyles.insert( aSplit ).first;
-                    }
-
-                    // Done if no part of aStyleRows extends beyond r
-                    if ( aStyleRows.mnEndRow <= rows_it->mnEndRow )
-                    {
-                        bAddRange = false;
-                        break;
-                    }
-
-                    // Cut off the part aStyleRows that was handled above
-                    aStyleRows.mnStartRow = rows_it->mnEndRow + 1;
+                    bAddRange = false;
+                    break;
                 }
-                if ( bAddRange )
-                    rRowStyles.insert( aStyleRows );
+
+                // Cut off the part aStyleRows that was handled above
+                aStyleRows.mnStartRow = rows_it->mnEndRow + 1;
+
+                aTempRowStyles.push_back(*rows_it);
+                ++rows_it;
             }
+            if ( bAddRange )
+                aTempRowStyles.push_back( aStyleRows );
+        }
+        while ( rows_it != rRowStyles.end() )
+        {
+            aTempRowStyles.push_back(*rows_it);
+            ++rows_it;
         }
+        rRowStyles.clear();
+        rRowStyles.insert_sorted_unique_vector(std::move(aTempRowStyles));
     }
 }
 
@@ -472,7 +479,7 @@ void SheetDataBuffer::finalizeImport()
         finalizeTableOperation( rRange, rModel );
 
     // write default formatting of remaining row range
-    maXfIdRowRangeList[ maXfIdRowRange.mnXfId ].push_back( 
maXfIdRowRange.maRowRange );
+    maXfIdRowRangeList.push_back( maXfIdRowRange );
 
     addColXfStyles();
 
commit 0a6d9fe495c5ed77fa8a56eb7f070d43ebaa725d
Author:     Noel Grandin <[email protected]>
AuthorDate: Fri May 9 20:00:36 2025 +0200
Commit:     Noel Grandin <[email protected]>
CommitDate: Sat May 10 08:37:19 2025 +0200

    reduce some data copying
    
    Change-Id: I9f6bb99df4505dd0c5bbfd8b40d9642864a9fb07
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/185100
    Tested-by: Jenkins
    Reviewed-by: Noel Grandin <[email protected]>

diff --git a/sc/source/filter/oox/stylesbuffer.cxx 
b/sc/source/filter/oox/stylesbuffer.cxx
index c3af8de44034..72ed6cd59882 100644
--- a/sc/source/filter/oox/stylesbuffer.cxx
+++ b/sc/source/filter/oox/stylesbuffer.cxx
@@ -2272,10 +2272,10 @@ void Xf::applyPatternToAttrList( AttrList& rAttrs, 
SCROW nRow1, SCROW nRow2, sal
     if (!pCachedPattern)
         rCache.add(nXfId, nNumFmtId, 
const_cast<ScPatternAttr*>(aEntry.getScPatternAttr()));
 
-    rAttrs.maAttrs.push_back(aEntry);
-
     if (!rDocImport.isLatinScript(*aEntry.getScPatternAttr()))
         rAttrs.mbLatinNumFmtOnly = false;
+
+    rAttrs.maAttrs.push_back(std::move(aEntry));
 }
 
 void Xf::writeToDoc( ScDocumentImport& rDoc, const ScRange& rRange )

Reply via email to