sc/source/filter/inc/sheetdatabuffer.hxx |    6 -
 sc/source/filter/oox/sheetdatabuffer.cxx |  153 +++++++++++++++++--------------
 2 files changed, 90 insertions(+), 69 deletions(-)

New commits:
commit deac5c84732c3491a0ef5bf7f8c1552e6def4fc0
Author:     Noel Grandin <n...@peralex.com>
AuthorDate: Fri Apr 30 18:24:12 2021 +0200
Commit:     Noel Grandin <noel.gran...@collabora.co.uk>
CommitDate: Fri Apr 30 21:39:51 2021 +0200

    tdf#79049 speed up OOXML workbook load (2)
    
    the allocation of memory and pointer chasing was
    slowing things down in the styles conversion.
    So switch to more cache-dense data structure,
    and re-arrange the loops to be more cache friendly.
    
    The takes the time from 1m9 to 40s for me.
    
    Change-Id: I876580adc7823313b0cdb067f2f6b3e61c39ecf8
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/114941
    Tested-by: Jenkins
    Reviewed-by: Noel Grandin <noel.gran...@collabora.co.uk>

diff --git a/sc/source/filter/inc/sheetdatabuffer.hxx 
b/sc/source/filter/inc/sheetdatabuffer.hxx
index c49e33280a10..17add16e6234 100644
--- a/sc/source/filter/inc/sheetdatabuffer.hxx
+++ b/sc/source/filter/inc/sheetdatabuffer.hxx
@@ -22,6 +22,7 @@
 #include <vector>
 #include <map>
 #include <set>
+#include <o3tl/sorted_vector.hxx>
 
 #include "richstring.hxx"
 #include "worksheethelper.hxx"
@@ -165,7 +166,8 @@ private:
 
     /** Writes all cell formatting attributes to the passed cell range list. 
(depreciates writeXfIdRangeProperties) */
     void                applyCellMerging( const ScRange& rRange );
-    void                addColXfStyle( sal_Int32 nXfId, sal_Int32 nFormatId, 
const ScRange& rAddress, bool bProcessRowRange = false );
+    void                addColXfStyles();
+    void                addColXfStyleProcessRowRanges();
 private:
     /** Stores cell range address and formula token array of an array formula. 
*/
     typedef std::pair< ScRange, ApiTokenSequence > ArrayFormula;
@@ -200,7 +202,7 @@ private:
             return lhs.mnEndRow<rhs.mnStartRow;
         }
     };
-    typedef ::std::set< RowRangeStyle, StyleRowRangeComp > RowStyles;
+    typedef ::o3tl::sorted_vector< RowRangeStyle, StyleRowRangeComp > 
RowStyles;
     typedef ::std::map< sal_Int32, RowStyles > ColStyles;
     /** Stores information about a merged cell range. */
     struct MergedRange
diff --git a/sc/source/filter/oox/sheetdatabuffer.cxx 
b/sc/source/filter/oox/sheetdatabuffer.cxx
index de1d2c76f3c9..c9c688c2ca17 100644
--- a/sc/source/filter/oox/sheetdatabuffer.cxx
+++ b/sc/source/filter/oox/sheetdatabuffer.cxx
@@ -346,57 +346,99 @@ static void addIfNotInMyMap( const StylesBuffer& rStyles, 
std::map< FormatKeyPai
     rMap[ FormatKeyPair( nXfId, nFormatId ) ] = rRangeList;
 }
 
-void SheetDataBuffer::addColXfStyle( sal_Int32 nXfId, sal_Int32 nFormatId, 
const ScRange& rAddress, bool bProcessRowRange )
-{
-    RowRangeStyle aStyleRows;
-    aStyleRows.mnNumFmt.first = nXfId;
-    aStyleRows.mnNumFmt.second = nFormatId;
-    aStyleRows.mnStartRow = rAddress.aStart.Row();
-    aStyleRows.mnEndRow = rAddress.aEnd.Row();
-    for ( sal_Int32 nCol = rAddress.aStart.Col(); nCol <= rAddress.aEnd.Col(); 
++nCol )
+void SheetDataBuffer::addColXfStyles()
+{
+    std::map< FormatKeyPair, ScRangeList > rangeStyleListMap;
+    for( const auto& [rFormatKeyPair, rRangeList] : maXfIdRangeLists )
+    {
+        addIfNotInMyMap( getStyles(), rangeStyleListMap, rFormatKeyPair.first, 
rFormatKeyPair.second, rRangeList );
+    }
+    // gather all ranges that have the same style and apply them in bulk
+    for ( const auto& [rFormatKeyPair, rRanges] : rangeStyleListMap )
     {
-        if ( !bProcessRowRange )
-            maStylesPerColumn[ nCol ].insert( aStyleRows );
-        else
+        for (const ScRange & rAddress : rRanges)
         {
-            RowStyles& rRowStyles = maStylesPerColumn[ nCol ];
-            // Reset row range for each column
+            RowRangeStyle aStyleRows;
+            aStyleRows.mnNumFmt.first = rFormatKeyPair.first;
+            aStyleRows.mnNumFmt.second = rFormatKeyPair.second;
             aStyleRows.mnStartRow = rAddress.aStart.Row();
             aStyleRows.mnEndRow = rAddress.aEnd.Row();
+            for ( sal_Int32 nCol = rAddress.aStart.Col(); nCol <= 
rAddress.aEnd.Col(); ++nCol )
+               maStylesPerColumn[ nCol ].insert( aStyleRows );
+        }
+    }
+}
 
-            // If aStyleRows includes rows already allocated to a style
-            // in rRowStyles, then we need to split it into parts.
-            // ( to occupy only rows that have no style definition)
-
-            // Start iterating at the first element that is not completely 
before aStyleRows
-            RowStyles::iterator rows_it = rRowStyles.lower_bound(aStyleRows);
-            RowStyles::iterator rows_end = rRowStyles.end();
-            bool bAddRange = true;
-            for ( ; rows_it != rows_end; ++rows_it )
+void SheetDataBuffer::addColXfStyleProcessRowRanges()
+{
+    // count the number of row-range-styles we have
+    AddressConverter& rAddrConv = getAddressConverter();
+    int cnt = 0;
+    for ( const auto& [nXfId, rRowRangeList] : maXfIdRowRangeList )
+    {
+        if ( nXfId == -1 ) // it's a dud skip it
+            continue;
+        cnt += rRowRangeList.size();
+    }
+    // pre-allocate space in the sorted_vector
+    for ( sal_Int32 nCol = 0; nCol <= rAddrConv.getMaxApiAddress().Col(); 
++nCol )
+    {
+       RowStyles& rRowStyles = maStylesPerColumn[ nCol ];
+       rRowStyles.reserve(rRowStyles.size() + cnt);
+    }
+    const auto nMaxCol = rAddrConv.getMaxApiAddress().Col();
+    for ( sal_Int32 nCol = 0; nCol <= nMaxCol; ++nCol )
+    {
+        RowStyles& rRowStyles = maStylesPerColumn[ nCol ];
+        for ( const auto& [nXfId, rRowRangeList] : maXfIdRowRangeList )
+        {
+            if ( nXfId == -1 ) // it's a dud skip it
+                continue;
+            // get all row ranges for id
+            for ( const auto& rRange : rRowRangeList )
             {
-                const RowRangeStyle& r = *rows_it;
-
-                // Add the part of aStyleRows that does not overlap with r
-                if ( aStyleRows.mnStartRow < r.mnStartRow )
+                RowRangeStyle aStyleRows;
+                aStyleRows.mnNumFmt.first = nXfId;
+                aStyleRows.mnNumFmt.second = -1;
+                aStyleRows.mnStartRow = rRange.mnFirst;
+                aStyleRows.mnEndRow = rRange.mnLast;
+
+                // Reset row range for each column
+                aStyleRows.mnStartRow = rRange.mnFirst;
+                aStyleRows.mnEndRow = rRange.mnLast;
+
+                // If aStyleRows includes rows already allocated to a style
+                // in rRowStyles, then we need to split it into parts.
+                // ( to occupy only rows that have no style definition)
+
+                // Start iterating at the first element that is not completely 
before aStyleRows
+                RowStyles::const_iterator rows_it = 
rRowStyles.lower_bound(aStyleRows);
+                bool bAddRange = true;
+                for ( ; rows_it != rRowStyles.end(); ++rows_it )
                 {
-                    RowRangeStyle aSplit = aStyleRows;
-                    aSplit.mnEndRow = std::min(aStyleRows.mnEndRow, 
r.mnStartRow - 1);
-                    // Insert with hint that aSplit comes directly before the 
current position
-                    rRowStyles.insert( rows_it, aSplit );
+                    const RowRangeStyle& r = *rows_it;
+
+                    // Add the part of aStyleRows that does not overlap with r
+                    if ( aStyleRows.mnStartRow < r.mnStartRow )
+                    {
+                        RowRangeStyle aSplit = aStyleRows;
+                        aSplit.mnEndRow = std::min(aStyleRows.mnEndRow, 
r.mnStartRow - 1);
+                        rows_it = rRowStyles.insert( aSplit ).first;
+                    }
+
+                    // Done if no part of aStyleRows extends beyond r
+                    if ( aStyleRows.mnEndRow <= r.mnEndRow )
+                    {
+                        bAddRange = false;
+                        break;
+                    }
+
+                    // Cut off the part aStyleRows that was handled above
+                    aStyleRows.mnStartRow = r.mnEndRow + 1;
                 }
-
-                // Done if no part of aStyleRows extends beyond r
-                if ( aStyleRows.mnEndRow <= r.mnEndRow )
-                {
-                    bAddRange = false;
-                    break;
-                }
-
-                // Cut off the part aStyleRows that was handled above
-                aStyleRows.mnStartRow = r.mnEndRow + 1;
+                if ( bAddRange )
+                    rRowStyles.insert( aStyleRows );
             }
-            if ( bAddRange )
-                rRowStyles.insert( aStyleRows );
         }
     }
 }
@@ -414,32 +456,9 @@ void SheetDataBuffer::finalizeImport()
     // write default formatting of remaining row range
     maXfIdRowRangeList[ maXfIdRowRange.mnXfId ].push_back( 
maXfIdRowRange.maRowRange );
 
-    std::map< FormatKeyPair, ScRangeList > rangeStyleListMap;
-    for( const auto& [rFormatKeyPair, rRangeList] : maXfIdRangeLists )
-    {
-        addIfNotInMyMap( getStyles(), rangeStyleListMap, rFormatKeyPair.first, 
rFormatKeyPair.second, rRangeList );
-    }
-    // gather all ranges that have the same style and apply them in bulk
-    for ( const auto& [rFormatKeyPair, rRanges] : rangeStyleListMap )
-    {
-        for (size_t i = 0, nSize = rRanges.size(); i < nSize; ++i)
-            addColXfStyle( rFormatKeyPair.first, rFormatKeyPair.second, 
rRanges[i]);
-    }
-
-    for ( const auto& [rXfId, rRowRangeList] : maXfIdRowRangeList )
-    {
-        if ( rXfId == -1 ) // it's a dud skip it
-            continue;
-        AddressConverter& rAddrConv = getAddressConverter();
-        // get all row ranges for id
-        for ( const auto& rRange : rRowRangeList )
-        {
-            ScRange aRange( 0, rRange.mnFirst, getSheetIndex(),
-                            rAddrConv.getMaxApiAddress().Col(), rRange.mnLast, 
getSheetIndex() );
+    addColXfStyles();
 
-            addColXfStyle( rXfId, -1, aRange, true );
-        }
-    }
+    addColXfStyleProcessRowRanges();
 
     ScDocumentImport& rDocImport = getDocImport();
     ScDocument& rDoc = rDocImport.getDoc();
_______________________________________________
Libreoffice-commits mailing list
libreoffice-comm...@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/libreoffice-commits

Reply via email to