i18nlangtag/source/languagetag/languagetag.cxx | 10 ++-- i18npool/inc/breakiterator_unicode.hxx | 14 +++++- i18npool/source/breakiterator/breakiterator_unicode.cxx | 37 +++++++++------- include/i18nlangtag/languagetag.hxx | 33 ++++++++++++-- 4 files changed, 69 insertions(+), 25 deletions(-)
New commits: commit ab1a88da2dd50b2cb067a53fbfae21a6564d6a94 Author: Noel Grandin <[email protected]> AuthorDate: Wed Jan 28 11:22:16 2026 +0200 Commit: Noel Grandin <[email protected]> CommitDate: Thu Jan 29 10:23:10 2026 +0100 tdf#148218 reduce OString alloc in BreakIterator_Unicode::loadICUBreakIterator use a custom StackString object to avoid heap allocation here. Shaves 16% of the temporary allocations. Change-Id: I2628780c9491c0f8b26bbfcbe085d9309c851f55 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/198280 Reviewed-by: Noel Grandin <[email protected]> Tested-by: Jenkins diff --git a/i18nlangtag/source/languagetag/languagetag.cxx b/i18nlangtag/source/languagetag/languagetag.cxx index 3836f923785c..9420c1d387af 100644 --- a/i18nlangtag/source/languagetag/languagetag.cxx +++ b/i18nlangtag/source/languagetag/languagetag.cxx @@ -394,7 +394,7 @@ private: /** Convert Locale to BCP 47 string without resolving system and creating temporary LanguageTag instances. */ static OUString convertToBcp47( const css::lang::Locale& rLocale ); - static void convertToBcp47( OStringBuffer& rBuf, const css::lang::Locale& rLocale ); + static void convertToBcp47( StackString64& rBuf, const css::lang::Locale& rLocale ); }; @@ -2875,7 +2875,7 @@ OUString LanguageTagImpl::convertToBcp47( const css::lang::Locale& rLocale ) /** We know the strings here are always ascii, so we can avoid some allocation work. */ -static void appendAscii(OStringBuffer& rBuf, const OUString& s) +static void appendAscii(StackString64& rBuf, const OUString& s) { rBuf.setLength(rBuf.getLength() + s.getLength()); const sal_Unicode* pSrc = s.getStr(); @@ -2890,7 +2890,7 @@ static void appendAscii(OStringBuffer& rBuf, const OUString& s) } // static -void LanguageTagImpl::convertToBcp47( OStringBuffer& rBuf, const css::lang::Locale& rLocale ) +void LanguageTagImpl::convertToBcp47( StackString64& rBuf, const css::lang::Locale& rLocale ) { if (rLocale.Language.isEmpty()) { @@ -2934,7 +2934,7 @@ OUString LanguageTag::convertToBcp47( const css::lang::Locale& rLocale, bool bRe } // static -void LanguageTag::convertToBcp47( OStringBuffer& rBuf, const css::lang::Locale& rLocale, bool bResolveSystem ) +void LanguageTag::convertToBcp47( StackString64& rBuf, const css::lang::Locale& rLocale, bool bResolveSystem ) { if (rLocale.Language.isEmpty()) { @@ -2963,7 +2963,7 @@ OUString LanguageTag::convertToBcp47( LanguageType nLangID ) } // static -void LanguageTag::convertToBcp47( OStringBuffer& rBuf, LanguageType nLangID ) +void LanguageTag::convertToBcp47( StackString64& rBuf, LanguageType nLangID ) { lang::Locale aLocale( LanguageTag::convertToLocale( nLangID )); // If system for some reason (should not happen... haha) could not be diff --git a/i18npool/inc/breakiterator_unicode.hxx b/i18npool/inc/breakiterator_unicode.hxx index d2cf5339f7d8..3835b35181e9 100644 --- a/i18npool/inc/breakiterator_unicode.hxx +++ b/i18npool/inc/breakiterator_unicode.hxx @@ -101,7 +101,19 @@ protected: sal_Int16 rBreakType, sal_Int16 rWordType, const char* name, const OUString& rText); public: - typedef std::unordered_map< OString, std::shared_ptr< BI_ValueData > > BIMap; + struct StringHash + { + using is_transparent = void; + std::size_t operator()(const OString& v) const + { + return v.hashCode(); + } + std::size_t operator()(std::string_view s) const + { + return rtl_str_hashCode_WithLength(s.data(), s.size()); + } + }; + typedef std::unordered_map< OString, std::shared_ptr< BI_ValueData >, StringHash, std::equal_to<> > BIMap; }; } diff --git a/i18npool/source/breakiterator/breakiterator_unicode.cxx b/i18npool/source/breakiterator/breakiterator_unicode.cxx index 6c3d3e0ae37b..cb885bd34a96 100644 --- a/i18npool/source/breakiterator/breakiterator_unicode.cxx +++ b/i18npool/source/breakiterator/breakiterator_unicode.cxx @@ -118,18 +118,18 @@ void BreakIterator_Unicode::loadICUBreakIterator(const css::lang::Locale& rLocal // expensive numeric conversion in append() for faster construction of the // always used global key. assert( 0 <= breakType && breakType <= 9 && 0 <= rBreakType && rBreakType <= 9 && 0 <= nWordType && nWordType <= 9); - OStringBuffer aKeyBuf(64); + StackString64 aKeyBuf; LanguageTag::convertToBcp47(aKeyBuf, rLocale); const sal_Int32 nLangStrLen = aKeyBuf.getLength(); aKeyBuf.append( ";" ); if (rule) aKeyBuf.append(rule); - aKeyBuf.append(";" + OStringChar(static_cast<char>('0'+breakType)) + ";" - + OStringChar(static_cast<char>('0'+rBreakType)) + ";" - + OStringChar( static_cast<char>('0'+nWordType))); + aKeyBuf.append(";").append(static_cast<char>('0'+breakType)).append(";") + .append(static_cast<char>('0'+rBreakType)).append(";") + .append(static_cast<char>('0'+nWordType)); // langtag;rule;breakType;rBreakType;nWordType - const OString aBIMapGlobalKey( aKeyBuf.makeStringAndClear()); - std::string_view aKeyBufView = std::string_view(aBIMapGlobalKey).substr(0, nLangStrLen); + const std::string_view aBIMapGlobalKey( aKeyBuf.toView() ); + std::string_view aKeyBufView = aBIMapGlobalKey.substr(0, nLangStrLen); if (icuBI->maBIMapKey != aBIMapGlobalKey || !icuBI->mpValue || !icuBI->mpValue->mpBreakIterator) { @@ -161,9 +161,10 @@ void BreakIterator_Unicode::loadICUBreakIterator(const css::lang::Locale& rLocal bInMap = (aMapIt != theBIMap.end()); if (bInMap) { + OString aBIMapGlobalKeyStr(aBIMapGlobalKey); icuBI->mpValue = aMapIt->second; - icuBI->maBIMapKey = aBIMapGlobalKey; - theBIMap.insert( std::make_pair( aBIMapGlobalKey, icuBI->mpValue)); + icuBI->maBIMapKey = aBIMapGlobalKeyStr; + theBIMap.insert( std::make_pair( aBIMapGlobalKeyStr, icuBI->mpValue)); break; // do } @@ -190,9 +191,10 @@ void BreakIterator_Unicode::loadICUBreakIterator(const css::lang::Locale& rLocal bInMap = (aMapIt != theBIMap.end()); if (bInMap) { + OString aBIMapGlobalKeyStr(aBIMapGlobalKey); icuBI->mpValue = aMapIt->second; - icuBI->maBIMapKey = aBIMapGlobalKey; - theBIMap.insert( std::make_pair( aBIMapGlobalKey, icuBI->mpValue)); + icuBI->maBIMapKey = aBIMapGlobalKeyStr; + theBIMap.insert( std::make_pair( aBIMapGlobalKeyStr, icuBI->mpValue)); break; // do } @@ -217,9 +219,10 @@ void BreakIterator_Unicode::loadICUBreakIterator(const css::lang::Locale& rLocal bInMap = (aMapIt != theBIMap.end()); if (bInMap) { + OString aBIMapGlobalKeyStr(aBIMapGlobalKey); icuBI->mpValue = aMapIt->second; - icuBI->maBIMapKey = aBIMapGlobalKey; - theBIMap.insert( std::make_pair( aBIMapGlobalKey, icuBI->mpValue)); + icuBI->maBIMapKey = aBIMapGlobalKeyStr; + theBIMap.insert( std::make_pair( aBIMapGlobalKeyStr, icuBI->mpValue)); break; // do } @@ -250,9 +253,10 @@ void BreakIterator_Unicode::loadICUBreakIterator(const css::lang::Locale& rLocal bInMap = (aMapIt != theBIMap.end()); if (bInMap) { + OString aBIMapGlobalKeyStr(aBIMapGlobalKey); icuBI->mpValue = aMapIt->second; - icuBI->maBIMapKey = aBIMapGlobalKey; - theBIMap.insert( std::make_pair( aBIMapGlobalKey, icuBI->mpValue)); + icuBI->maBIMapKey = aBIMapGlobalKeyStr; + theBIMap.insert( std::make_pair( aBIMapGlobalKeyStr, icuBI->mpValue)); break; // do } @@ -284,9 +288,10 @@ void BreakIterator_Unicode::loadICUBreakIterator(const css::lang::Locale& rLocal if (!icuBI->mpValue || !icuBI->mpValue->mpBreakIterator) { throw uno::RuntimeException(u"ICU BreakIterator is not properly initialized"_ustr); } - icuBI->maBIMapKey = aBIMapGlobalKey; + OString aBIMapGlobalKeyStr(aBIMapGlobalKey); + icuBI->maBIMapKey = aBIMapGlobalKeyStr; if (!bInMap) - theBIMap.insert( std::make_pair( aBIMapGlobalKey, icuBI->mpValue)); + theBIMap.insert( std::make_pair( aBIMapGlobalKeyStr, icuBI->mpValue)); bNewBreak=true; } diff --git a/include/i18nlangtag/languagetag.hxx b/include/i18nlangtag/languagetag.hxx index b8411f7528fa..cad086b15998 100644 --- a/include/i18nlangtag/languagetag.hxx +++ b/include/i18nlangtag/languagetag.hxx @@ -13,7 +13,6 @@ #include <sal/config.h> #include <rtl/locale.h> #include <rtl/ustring.hxx> -#include <rtl/strbuf.hxx> #include <com/sun/star/lang/Locale.hpp> #include <i18nlangtag/i18nlangtagdllapi.h> #include <i18nlangtag/lang.h> @@ -40,6 +39,34 @@ inline constexpr OUString I18NLANGTAG_QLT = u"qlt"_ustr; class LanguageTagImpl; +/** + Small 64-byte length "string" that emulates part of OString's API, and is meant to be + stack-allocated for speed. +*/ +class StackString64 +{ +public: + sal_Int32 getLength() const { return mnLength; } + void setLength(sal_Int32 n) { mnLength = n; } + char* getMutableStr() { return maData; } + StackString64& append(const char *s) + { + int slen = strlen(s); + for (int i=0; i < slen; i++) + maData[mnLength++] = *(s++); + return *this; + } + StackString64& append(char ch) + { + maData[mnLength++] = ch; + return *this; + } + std::string_view toView() const { return std::string_view(maData, mnLength); } +private: + char maData[64]; + sal_Int32 mnLength = 0; +}; + /** Wrapper for liblangtag BCP 47 language tags, MS-LangIDs, locales and conversions in between. @@ -447,7 +474,7 @@ public: locale to the real locale used. */ static OUString convertToBcp47( LanguageType nLangID ); - static void convertToBcp47( OStringBuffer& rBuf, LanguageType nLangID ); + static void convertToBcp47( StackString64& rBuf, LanguageType nLangID ); /** Convert Locale to BCP 47 string. @@ -457,7 +484,7 @@ public: If FALSE, return an empty OUString for such a tag. */ static OUString convertToBcp47( const css::lang::Locale& rLocale, bool bResolveSystem = true ); - static void convertToBcp47( OStringBuffer& rBuf, const css::lang::Locale& rLocale, bool bResolveSystem = true ); + static void convertToBcp47( StackString64& rBuf, const css::lang::Locale& rLocale, bool bResolveSystem = true ); /** Convert BCP 47 string to Locale, convenience method.
