i18nlangtag/source/languagetag/languagetag.cxx | 71 ++++++++++++++++ i18npool/source/breakiterator/breakiterator_unicode.cxx | 10 +- include/i18nlangtag/languagetag.hxx | 3 3 files changed, 80 insertions(+), 4 deletions(-)
New commits: commit fba849c048ce41a3e726778b7a088a47faf84b29 Author: Noel Grandin <[email protected]> AuthorDate: Tue Jan 27 13:29:55 2026 +0200 Commit: Noel Grandin <[email protected]> CommitDate: Tue Jan 27 17:15:52 2026 +0100 tdf#148218 reduce OUString allocations we allocate an awful lot of temporary OUString while looking up translation strings. Add some API to LanguageTag to reduce this by using a common OStringBuffer. Reduces peak memory usage from 420M to 283M Change-Id: Ie55267631f2178e3a6590eebf5876c963a4cda69 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/198197 Tested-by: Jenkins Reviewed-by: Noel Grandin <[email protected]> diff --git a/i18nlangtag/source/languagetag/languagetag.cxx b/i18nlangtag/source/languagetag/languagetag.cxx index 2b301efdb4e1..3836f923785c 100644 --- a/i18nlangtag/source/languagetag/languagetag.cxx +++ b/i18nlangtag/source/languagetag/languagetag.cxx @@ -394,6 +394,7 @@ private: /** Convert Locale to BCP 47 string without resolving system and creating temporary LanguageTag instances. */ static OUString convertToBcp47( const css::lang::Locale& rLocale ); + static void convertToBcp47( OStringBuffer& rBuf, const css::lang::Locale& rLocale ); }; @@ -2871,6 +2872,49 @@ OUString LanguageTagImpl::convertToBcp47( const css::lang::Locale& rLocale ) return aBcp47; } +/** + We know the strings here are always ascii, so we can avoid some allocation work. +*/ +static void appendAscii(OStringBuffer& rBuf, const OUString& s) +{ + rBuf.setLength(rBuf.getLength() + s.getLength()); + const sal_Unicode* pSrc = s.getStr(); + char* pDest = rBuf.getMutableStr(); + for (int i=0; i<s.getLength(); i++) + { + assert(*pSrc < 127); + *pDest = static_cast<char>(*pSrc); + ++pSrc; + ++pDest; + } +} + +// static +void LanguageTagImpl::convertToBcp47( OStringBuffer& rBuf, const css::lang::Locale& rLocale ) +{ + if (rLocale.Language.isEmpty()) + { + // aBcp47 stays empty + } + else if (rLocale.Language == I18NLANGTAG_QLT) + { + appendAscii(rBuf, rLocale.Variant); + } + else + { + /* XXX NOTE: most legacy code never evaluated the Variant field, so for + * now just concatenate language and country. In case we stumbled over + * variant aware code we'd have to take care of that. */ + if (rLocale.Country.isEmpty()) + appendAscii(rBuf, rLocale.Language); + else + { + appendAscii(rBuf, rLocale.Language); + rBuf.append("-"); + appendAscii(rBuf, rLocale.Country); + } + } +} // static OUString LanguageTag::convertToBcp47( const css::lang::Locale& rLocale, bool bResolveSystem ) @@ -2889,6 +2933,22 @@ OUString LanguageTag::convertToBcp47( const css::lang::Locale& rLocale, bool bRe return aBcp47; } +// static +void LanguageTag::convertToBcp47( OStringBuffer& rBuf, const css::lang::Locale& rLocale, bool bResolveSystem ) +{ + if (rLocale.Language.isEmpty()) + { + if (bResolveSystem) + LanguageTag::convertToBcp47( rBuf, LANGUAGE_SYSTEM ); + // else aBcp47 stays empty + } + else + { + LanguageTagImpl::convertToBcp47( rBuf, rLocale); + } +} + + // static OUString LanguageTag::convertToBcp47( LanguageType nLangID ) @@ -2902,6 +2962,17 @@ OUString LanguageTag::convertToBcp47( LanguageType nLangID ) return LanguageTagImpl::convertToBcp47( aLocale); } +// static +void LanguageTag::convertToBcp47( OStringBuffer& rBuf, LanguageType nLangID ) +{ + lang::Locale aLocale( LanguageTag::convertToLocale( nLangID )); + // If system for some reason (should not happen... haha) could not be + // resolved DO NOT CALL LanguageTag::convertToBcp47(Locale) because that + // would recurse into this method here! + if (aLocale.Language.isEmpty()) + return; // bad luck, bail out + LanguageTagImpl::convertToBcp47(rBuf, aLocale); +} // static css::lang::Locale LanguageTag::convertToLocale( const OUString& rBcp47, bool bResolveSystem ) diff --git a/i18npool/source/breakiterator/breakiterator_unicode.cxx b/i18npool/source/breakiterator/breakiterator_unicode.cxx index 4e5df75d2701..6c3d3e0ae37b 100644 --- a/i18npool/source/breakiterator/breakiterator_unicode.cxx +++ b/i18npool/source/breakiterator/breakiterator_unicode.cxx @@ -118,9 +118,10 @@ void BreakIterator_Unicode::loadICUBreakIterator(const css::lang::Locale& rLocal // expensive numeric conversion in append() for faster construction of the // always used global key. assert( 0 <= breakType && breakType <= 9 && 0 <= rBreakType && rBreakType <= 9 && 0 <= nWordType && nWordType <= 9); - const OString aLangtagStr( LanguageTag::convertToBcp47( rLocale).toUtf8()); OStringBuffer aKeyBuf(64); - aKeyBuf.append( aLangtagStr + ";" ); + LanguageTag::convertToBcp47(aKeyBuf, rLocale); + const sal_Int32 nLangStrLen = aKeyBuf.getLength(); + aKeyBuf.append( ";" ); if (rule) aKeyBuf.append(rule); aKeyBuf.append(";" + OStringChar(static_cast<char>('0'+breakType)) + ";" @@ -128,6 +129,7 @@ void BreakIterator_Unicode::loadICUBreakIterator(const css::lang::Locale& rLocal + OStringChar( static_cast<char>('0'+nWordType))); // langtag;rule;breakType;rBreakType;nWordType const OString aBIMapGlobalKey( aKeyBuf.makeStringAndClear()); + std::string_view aKeyBufView = std::string_view(aBIMapGlobalKey).substr(0, nLangStrLen); if (icuBI->maBIMapKey != aBIMapGlobalKey || !icuBI->mpValue || !icuBI->mpValue->mpBreakIterator) { @@ -154,7 +156,7 @@ void BreakIterator_Unicode::loadICUBreakIterator(const css::lang::Locale& rLocal if (breakRules.getLength() > breakType && !breakRules[breakType].isEmpty()) { // langtag;rule;breakType - const OString aBIMapRuleTypeKey( aLangtagStr + ";" + rule + ";" + OString::number(breakType)); + const OString aBIMapRuleTypeKey( OString::Concat(aKeyBufView) + ";" + rule + ";" + OString::number(breakType)); aMapIt = theBIMap.find( aBIMapRuleTypeKey); bInMap = (aMapIt != theBIMap.end()); if (bInMap) @@ -243,7 +245,7 @@ void BreakIterator_Unicode::loadICUBreakIterator(const css::lang::Locale& rLocal do { // langtag;;;rBreakType (empty rule; empty breakType) - const OString aBIMapLocaleTypeKey( aLangtagStr + ";;;" + OString::number(rBreakType)); + const OString aBIMapLocaleTypeKey( OString::Concat(aKeyBufView) + ";;;" + OString::number(rBreakType)); aMapIt = theBIMap.find( aBIMapLocaleTypeKey); bInMap = (aMapIt != theBIMap.end()); if (bInMap) diff --git a/include/i18nlangtag/languagetag.hxx b/include/i18nlangtag/languagetag.hxx index fc77c5e285d4..b8411f7528fa 100644 --- a/include/i18nlangtag/languagetag.hxx +++ b/include/i18nlangtag/languagetag.hxx @@ -13,6 +13,7 @@ #include <sal/config.h> #include <rtl/locale.h> #include <rtl/ustring.hxx> +#include <rtl/strbuf.hxx> #include <com/sun/star/lang/Locale.hpp> #include <i18nlangtag/i18nlangtagdllapi.h> #include <i18nlangtag/lang.h> @@ -446,6 +447,7 @@ public: locale to the real locale used. */ static OUString convertToBcp47( LanguageType nLangID ); + static void convertToBcp47( OStringBuffer& rBuf, LanguageType nLangID ); /** Convert Locale to BCP 47 string. @@ -455,6 +457,7 @@ public: If FALSE, return an empty OUString for such a tag. */ static OUString convertToBcp47( const css::lang::Locale& rLocale, bool bResolveSystem = true ); + static void convertToBcp47( OStringBuffer& rBuf, const css::lang::Locale& rLocale, bool bResolveSystem = true ); /** Convert BCP 47 string to Locale, convenience method.
