i18nlangtag/source/languagetag/languagetag.cxx          |   71 ++++++++++++++++
 i18npool/source/breakiterator/breakiterator_unicode.cxx |   10 +-
 include/i18nlangtag/languagetag.hxx                     |    3 
 3 files changed, 80 insertions(+), 4 deletions(-)

New commits:
commit fba849c048ce41a3e726778b7a088a47faf84b29
Author:     Noel Grandin <[email protected]>
AuthorDate: Tue Jan 27 13:29:55 2026 +0200
Commit:     Noel Grandin <[email protected]>
CommitDate: Tue Jan 27 17:15:52 2026 +0100

    tdf#148218 reduce OUString allocations
    
    we allocate an awful lot of temporary OUString while looking up translation 
strings.
    
    Add some API to LanguageTag to reduce this by using a common OStringBuffer.
    
    Reduces peak memory usage from 420M to 283M
    
    Change-Id: Ie55267631f2178e3a6590eebf5876c963a4cda69
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/198197
    Tested-by: Jenkins
    Reviewed-by: Noel Grandin <[email protected]>

diff --git a/i18nlangtag/source/languagetag/languagetag.cxx 
b/i18nlangtag/source/languagetag/languagetag.cxx
index 2b301efdb4e1..3836f923785c 100644
--- a/i18nlangtag/source/languagetag/languagetag.cxx
+++ b/i18nlangtag/source/languagetag/languagetag.cxx
@@ -394,6 +394,7 @@ private:
     /** Convert Locale to BCP 47 string without resolving system and creating
         temporary LanguageTag instances. */
     static OUString     convertToBcp47( const css::lang::Locale& rLocale );
+    static void         convertToBcp47( OStringBuffer& rBuf, const 
css::lang::Locale& rLocale );
 
 };
 
@@ -2871,6 +2872,49 @@ OUString LanguageTagImpl::convertToBcp47( const 
css::lang::Locale& rLocale )
     return aBcp47;
 }
 
+/**
+  We know the strings here are always ascii, so we can avoid some allocation 
work.
+*/
+static void appendAscii(OStringBuffer& rBuf, const OUString& s)
+{
+    rBuf.setLength(rBuf.getLength() + s.getLength());
+    const sal_Unicode* pSrc = s.getStr();
+    char* pDest = rBuf.getMutableStr();
+    for (int i=0; i<s.getLength(); i++)
+    {
+        assert(*pSrc < 127);
+        *pDest = static_cast<char>(*pSrc);
+        ++pSrc;
+        ++pDest;
+    }
+}
+
+// static
+void LanguageTagImpl::convertToBcp47( OStringBuffer& rBuf, const 
css::lang::Locale& rLocale )
+{
+    if (rLocale.Language.isEmpty())
+    {
+        // aBcp47 stays empty
+    }
+    else if (rLocale.Language == I18NLANGTAG_QLT)
+    {
+        appendAscii(rBuf, rLocale.Variant);
+    }
+    else
+    {
+        /* XXX NOTE: most legacy code never evaluated the Variant field, so for
+         * now just concatenate language and country. In case we stumbled over
+         * variant aware code we'd have to take care of that. */
+        if (rLocale.Country.isEmpty())
+            appendAscii(rBuf, rLocale.Language);
+        else
+        {
+            appendAscii(rBuf, rLocale.Language);
+            rBuf.append("-");
+            appendAscii(rBuf, rLocale.Country);
+        }
+    }
+}
 
 // static
 OUString LanguageTag::convertToBcp47( const css::lang::Locale& rLocale, bool 
bResolveSystem )
@@ -2889,6 +2933,22 @@ OUString LanguageTag::convertToBcp47( const 
css::lang::Locale& rLocale, bool bRe
     return aBcp47;
 }
 
+// static
+void LanguageTag::convertToBcp47( OStringBuffer& rBuf, const 
css::lang::Locale& rLocale, bool bResolveSystem )
+{
+    if (rLocale.Language.isEmpty())
+    {
+        if (bResolveSystem)
+            LanguageTag::convertToBcp47( rBuf, LANGUAGE_SYSTEM );
+        // else aBcp47 stays empty
+    }
+    else
+    {
+        LanguageTagImpl::convertToBcp47( rBuf, rLocale);
+    }
+}
+
+
 
 // static
 OUString LanguageTag::convertToBcp47( LanguageType nLangID )
@@ -2902,6 +2962,17 @@ OUString LanguageTag::convertToBcp47( LanguageType 
nLangID )
     return LanguageTagImpl::convertToBcp47( aLocale);
 }
 
+// static
+void LanguageTag::convertToBcp47( OStringBuffer& rBuf, LanguageType nLangID )
+{
+    lang::Locale aLocale( LanguageTag::convertToLocale( nLangID ));
+    // If system for some reason (should not happen... haha) could not be
+    // resolved DO NOT CALL LanguageTag::convertToBcp47(Locale) because that
+    // would recurse into this method here!
+    if (aLocale.Language.isEmpty())
+        return;      // bad luck, bail out
+    LanguageTagImpl::convertToBcp47(rBuf, aLocale);
+}
 
 // static
 css::lang::Locale LanguageTag::convertToLocale( const OUString& rBcp47, bool 
bResolveSystem )
diff --git a/i18npool/source/breakiterator/breakiterator_unicode.cxx 
b/i18npool/source/breakiterator/breakiterator_unicode.cxx
index 4e5df75d2701..6c3d3e0ae37b 100644
--- a/i18npool/source/breakiterator/breakiterator_unicode.cxx
+++ b/i18npool/source/breakiterator/breakiterator_unicode.cxx
@@ -118,9 +118,10 @@ void BreakIterator_Unicode::loadICUBreakIterator(const 
css::lang::Locale& rLocal
     // expensive numeric conversion in append() for faster construction of the
     // always used global key.
     assert( 0 <= breakType && breakType <= 9 && 0 <= rBreakType && rBreakType 
<= 9 && 0 <= nWordType && nWordType <= 9);
-    const OString aLangtagStr( LanguageTag::convertToBcp47( rLocale).toUtf8());
     OStringBuffer aKeyBuf(64);
-    aKeyBuf.append( aLangtagStr + ";" );
+    LanguageTag::convertToBcp47(aKeyBuf, rLocale);
+    const sal_Int32 nLangStrLen = aKeyBuf.getLength();
+    aKeyBuf.append( ";" );
     if (rule)
         aKeyBuf.append(rule);
     aKeyBuf.append(";" + OStringChar(static_cast<char>('0'+breakType)) + ";"
@@ -128,6 +129,7 @@ void BreakIterator_Unicode::loadICUBreakIterator(const 
css::lang::Locale& rLocal
         + OStringChar( static_cast<char>('0'+nWordType)));
     // langtag;rule;breakType;rBreakType;nWordType
     const OString aBIMapGlobalKey( aKeyBuf.makeStringAndClear());
+    std::string_view aKeyBufView = std::string_view(aBIMapGlobalKey).substr(0, 
nLangStrLen);
 
     if (icuBI->maBIMapKey != aBIMapGlobalKey || !icuBI->mpValue || 
!icuBI->mpValue->mpBreakIterator)
     {
@@ -154,7 +156,7 @@ void BreakIterator_Unicode::loadICUBreakIterator(const 
css::lang::Locale& rLocal
                 if (breakRules.getLength() > breakType && 
!breakRules[breakType].isEmpty())
                 {
                     // langtag;rule;breakType
-                    const OString aBIMapRuleTypeKey( aLangtagStr + ";" + rule 
+ ";" + OString::number(breakType));
+                    const OString aBIMapRuleTypeKey( 
OString::Concat(aKeyBufView) + ";" + rule + ";" + OString::number(breakType));
                     aMapIt = theBIMap.find( aBIMapRuleTypeKey);
                     bInMap = (aMapIt != theBIMap.end());
                     if (bInMap)
@@ -243,7 +245,7 @@ void BreakIterator_Unicode::loadICUBreakIterator(const 
css::lang::Locale& rLocal
             do
             {
                 // langtag;;;rBreakType (empty rule; empty breakType)
-                const OString aBIMapLocaleTypeKey( aLangtagStr + ";;;" + 
OString::number(rBreakType));
+                const OString aBIMapLocaleTypeKey( 
OString::Concat(aKeyBufView) + ";;;" + OString::number(rBreakType));
                 aMapIt = theBIMap.find( aBIMapLocaleTypeKey);
                 bInMap = (aMapIt != theBIMap.end());
                 if (bInMap)
diff --git a/include/i18nlangtag/languagetag.hxx 
b/include/i18nlangtag/languagetag.hxx
index fc77c5e285d4..b8411f7528fa 100644
--- a/include/i18nlangtag/languagetag.hxx
+++ b/include/i18nlangtag/languagetag.hxx
@@ -13,6 +13,7 @@
 #include <sal/config.h>
 #include <rtl/locale.h>
 #include <rtl/ustring.hxx>
+#include <rtl/strbuf.hxx>
 #include <com/sun/star/lang/Locale.hpp>
 #include <i18nlangtag/i18nlangtagdllapi.h>
 #include <i18nlangtag/lang.h>
@@ -446,6 +447,7 @@ public:
        locale to the real locale used.
      */
     static OUString convertToBcp47( LanguageType nLangID );
+    static void convertToBcp47( OStringBuffer& rBuf, LanguageType nLangID );
 
     /** Convert Locale to BCP 47 string.
 
@@ -455,6 +457,7 @@ public:
                If FALSE, return an empty OUString for such a tag.
      */
     static OUString convertToBcp47( const css::lang::Locale& rLocale, bool 
bResolveSystem = true );
+    static void convertToBcp47( OStringBuffer& rBuf, const css::lang::Locale& 
rLocale, bool bResolveSystem = true );
 
     /** Convert BCP 47 string to Locale, convenience method.
 

Reply via email to