editeng/source/misc/acorrcfg.cxx | 16 ++++++++-------- i18nlangtag/qa/cppunit/test_languagetag.cxx | 2 ++ i18nlangtag/source/isolang/isolang.cxx | 2 ++ i18nlangtag/source/isolang/mslangid.cxx | 6 +++--- i18nlangtag/source/languagetag/languagetag.cxx | 6 +++--- include/i18nlangtag/mslangid.hxx | 5 +++-- 6 files changed, 21 insertions(+), 16 deletions(-)
New commits: commit 437abb3abbc506c1e20c6fec8e574abfe3487842 Author: Eike Rathke <er...@redhat.com> AuthorDate: Tue Oct 4 11:14:38 2022 +0200 Commit: Eike Rathke <er...@redhat.com> CommitDate: Tue Oct 4 19:15:45 2022 +0200 Check acor_langtag for language-script fallback instead of only language A tag with script could be added for which we don't have locale data but for the same language in another script. Do not fall back to that. With that the share/autocorr/acor_zh-{CN,TW}.dat files created additional Asian language listbox entries "Chinese (Simplified, China) {zh-Hans-CN}" "Chinese (Traditional, Taiwan) {zh-Hant-TW}" because those are the canonicalized language tags. Prefer the known legacy zh-CN and zh-TW tags instead. Could also had happened with any document import. Which again revealed a flaw in the handling of mapping overrides where converting from a LanguageType LCID to Locale could yield a different mapping than from BCP47 to LCID, which in the case of a fallback for locale data lead to odd side effects. Change-Id: I1e2aaa8e9f99b6b3bc2c9a661215cb00bddd33d6 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/140939 Reviewed-by: Eike Rathke <er...@redhat.com> Tested-by: Jenkins diff --git a/editeng/source/misc/acorrcfg.cxx b/editeng/source/misc/acorrcfg.cxx index 5806179bc5f0..53a43cff3f22 100644 --- a/editeng/source/misc/acorrcfg.cxx +++ b/editeng/source/misc/acorrcfg.cxx @@ -75,20 +75,20 @@ static void scanAutoCorrectDirForLanguageTags( const OUString& rURL ) if (SvtLanguageTable::HasLanguageType( aLanguageTag.getLanguageType())) continue; - // Insert language-only tags only if there is no known - // matching fallback locale, otherwise we'd end up with - // unwanted entries where a language autocorrection - // file covers several locales. We do know a few - // art-x-... though so exclude those and any other - // private-use tag (which should not fallback, but - // avoid). + // Insert language(-script)-only tags only if there is + // no known matching fallback locale, otherwise we'd + // end up with unwanted entries where a language + // autocorrection file covers several locales. We do + // know a few art-x-... though so exclude those and any + // other private-use tag (which should not fallback, + // but avoid). if (aLanguageTag.getCountry().isEmpty() && LanguageTag::isValidBcp47( aCanonicalized, nullptr, LanguageTag::PrivateUse::DISALLOW)) { LanguageTag aFallback( aLanguageTag); aFallback.makeFallback(); - if (aFallback.getLanguage() == aLanguageTag.getLanguage()) + if (aFallback.getLanguageAndScript() == aLanguageTag.getLanguageAndScript()) continue; } diff --git a/i18nlangtag/qa/cppunit/test_languagetag.cxx b/i18nlangtag/qa/cppunit/test_languagetag.cxx index 4a211012e80d..f7864d6104fd 100644 --- a/i18nlangtag/qa/cppunit/test_languagetag.cxx +++ b/i18nlangtag/qa/cppunit/test_languagetag.cxx @@ -753,6 +753,8 @@ bool checkMapping( std::u16string_view rStr1, std::u16string_view rStr2 ) if (rStr1 == u"kw-UK" ) return rStr2 == u"kw-GB"; if (rStr1 == u"oc-FR" ) return rStr2 == u"oc-FR-lengadoc"; if (rStr1 == u"oc-ES" ) return rStr2 == u"oc-ES-aranes"; + if (rStr1 == u"zh-Hans-CN" ) return rStr2 == u"zh-CN"; + if (rStr1 == u"zh-Hant-TW" ) return rStr2 == u"zh-TW"; return rStr1 == rStr2; } diff --git a/i18nlangtag/source/isolang/isolang.cxx b/i18nlangtag/source/isolang/isolang.cxx index d20014e67f4b..4287fe247cb4 100644 --- a/i18nlangtag/source/isolang/isolang.cxx +++ b/i18nlangtag/source/isolang/isolang.cxx @@ -754,6 +754,8 @@ IsoLanguageScriptCountryEntry const aImplIsoLangScriptEntries[] = { LANGUAGE_BOSNIAN_LATIN_BOSNIA_HERZEGOVINA, "bs-Latn", "BA", kSAME }, // MS, though Latn is suppress-script { LANGUAGE_BOSNIAN_LATIN_LSO, "bs-Latn", "" , LANGUAGE_BOSNIAN_LSO }, // MS, though Latn is suppress-script { LANGUAGE_CHINESE_TRADITIONAL_LSO, "zh-Hant", "" , k0 }, + { LANGUAGE_CHINESE_SIMPLIFIED, "zh-Hans", "CN", kSAME }, // canonical, but prefer legacy zh-CN + { LANGUAGE_CHINESE_TRADITIONAL, "zh-Hant", "TW", kSAME }, // canonical, but prefer legacy zh-TW { LANGUAGE_USER_MANINKAKAN_EASTERN_LATIN, "emk-Latn", "GN", k0 }, { LANGUAGE_USER_CREE_PLAINS_LATIN, "crk-Latn", "CA", k0 }, { LANGUAGE_USER_CREE_PLAINS_SYLLABICS, "crk-Cans", "CA", k0 }, diff --git a/i18nlangtag/source/isolang/mslangid.cxx b/i18nlangtag/source/isolang/mslangid.cxx index 34c55b66e261..419e28445774 100644 --- a/i18nlangtag/source/isolang/mslangid.cxx +++ b/i18nlangtag/source/isolang/mslangid.cxx @@ -191,14 +191,14 @@ bool MsLangId::usesHyphenation(LanguageType nLang) // static css::lang::Locale MsLangId::Conversion::convertLanguageToLocale( - LanguageType nLang ) + LanguageType nLang, bool bIgnoreOverride ) { css::lang::Locale aLocale; // Still resolve LANGUAGE_DONTKNOW if resolving is not requested, // but not LANGUAGE_SYSTEM or others. LanguageType nOrigLang = nLang; nLang = MsLangId::getRealLanguage(nLang); - convertLanguageToLocaleImpl( nLang, aLocale, true ); + convertLanguageToLocaleImpl( nLang, aLocale, bIgnoreOverride ); if (aLocale.Language.isEmpty() && simplifySystemLanguages(nOrigLang) == LANGUAGE_SYSTEM) { // None found but resolve requested, last resort is "en-US". @@ -228,7 +228,7 @@ css::lang::Locale MsLangId::getFallbackLocale( { // empty language => LANGUAGE_SYSTEM if (rLocale.Language.isEmpty()) - return Conversion::lookupFallbackLocale( Conversion::convertLanguageToLocale( LANGUAGE_SYSTEM )); + return Conversion::lookupFallbackLocale( Conversion::convertLanguageToLocale( LANGUAGE_SYSTEM, false)); else return Conversion::lookupFallbackLocale( rLocale); } diff --git a/i18nlangtag/source/languagetag/languagetag.cxx b/i18nlangtag/source/languagetag/languagetag.cxx index 239215a187c1..7071f2dcb116 100644 --- a/i18nlangtag/source/languagetag/languagetag.cxx +++ b/i18nlangtag/source/languagetag/languagetag.cxx @@ -961,7 +961,7 @@ LanguageTag::ImplPtr LanguageTag::registerImpl() const // May have involved canonicalize(), so compare with // pImpl->maBcp47 instead of maBcp47! aBcp47 = LanguageTagImpl::convertToBcp47( - MsLangId::Conversion::convertLanguageToLocale( pImpl->mnLangID )); + MsLangId::Conversion::convertLanguageToLocale( pImpl->mnLangID, true)); bInsert = (aBcp47 == pImpl->maBcp47); } } @@ -1352,7 +1352,7 @@ void LanguageTagImpl::convertLocaleToBcp47() // locale via LanguageTag::convertToBcp47(LanguageType) and // LanguageTag::convertToLocale(LanguageType) would instantiate another // LanguageTag. - maLocale = MsLangId::Conversion::convertLanguageToLocale( LANGUAGE_SYSTEM ); + maLocale = MsLangId::Conversion::convertLanguageToLocale( LANGUAGE_SYSTEM, false); } if (maLocale.Language.isEmpty()) { @@ -1496,7 +1496,7 @@ void LanguageTagImpl::convertLangToLocale() mbInitializedLangID = true; } // Resolve system here! The original is remembered as mbSystemLocale. - maLocale = MsLangId::Conversion::convertLanguageToLocale( mnLangID ); + maLocale = MsLangId::Conversion::convertLanguageToLocale( mnLangID, false); mbInitializedLocale = true; } diff --git a/include/i18nlangtag/mslangid.hxx b/include/i18nlangtag/mslangid.hxx index 1d2f5cc8c651..f59e2d683def 100644 --- a/include/i18nlangtag/mslangid.hxx +++ b/include/i18nlangtag/mslangid.hxx @@ -292,11 +292,12 @@ public: /** Convert a LanguageType to a Locale. */ I18NLANGTAG_DLLPRIVATE static css::lang::Locale convertLanguageToLocale( - LanguageType nLang ); + LanguageType nLang, bool bIgnoreOverride ); /** Used by convertLanguageToLocale(LanguageType,bool) and getLocale(IsoLanguageCountryEntry*) and - getLocale(IsoLanguageScriptCountryEntry) + getLocale(IsoLanguageScriptCountryEntry*) and + getLocale(Bcp47CountryEntry*) @param bIgnoreOverride If bIgnoreOverride==true, a matching entry is used even if