editeng/source/misc/acorrcfg.cxx               |   16 ++++++++--------
 i18nlangtag/qa/cppunit/test_languagetag.cxx    |    2 ++
 i18nlangtag/source/isolang/isolang.cxx         |    2 ++
 i18nlangtag/source/isolang/mslangid.cxx        |    6 +++---
 i18nlangtag/source/languagetag/languagetag.cxx |    6 +++---
 include/i18nlangtag/mslangid.hxx               |    5 +++--
 6 files changed, 21 insertions(+), 16 deletions(-)

New commits:
commit 437abb3abbc506c1e20c6fec8e574abfe3487842
Author:     Eike Rathke <er...@redhat.com>
AuthorDate: Tue Oct 4 11:14:38 2022 +0200
Commit:     Eike Rathke <er...@redhat.com>
CommitDate: Tue Oct 4 19:15:45 2022 +0200

    Check acor_langtag for language-script fallback instead of only language
    
    A tag with script could be added for which we don't have locale
    data but for the same language in another script. Do not fall back
    to that.
    
    With that the share/autocorr/acor_zh-{CN,TW}.dat files created
    additional Asian language listbox entries
    "Chinese (Simplified, China) {zh-Hans-CN}"
    "Chinese (Traditional, Taiwan) {zh-Hant-TW}"
    because those are the canonicalized language tags.
    Prefer the known legacy zh-CN and zh-TW tags instead.
    Could also had happened with any document import.
    
    Which again revealed a flaw in the handling of mapping overrides
    where converting from a LanguageType LCID to Locale could yield a
    different mapping than from BCP47 to LCID, which in the case of
    a fallback for locale data lead to odd side effects.
    
    Change-Id: I1e2aaa8e9f99b6b3bc2c9a661215cb00bddd33d6
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/140939
    Reviewed-by: Eike Rathke <er...@redhat.com>
    Tested-by: Jenkins

diff --git a/editeng/source/misc/acorrcfg.cxx b/editeng/source/misc/acorrcfg.cxx
index 5806179bc5f0..53a43cff3f22 100644
--- a/editeng/source/misc/acorrcfg.cxx
+++ b/editeng/source/misc/acorrcfg.cxx
@@ -75,20 +75,20 @@ static void scanAutoCorrectDirForLanguageTags( const 
OUString& rURL )
                         if (SvtLanguageTable::HasLanguageType( 
aLanguageTag.getLanguageType()))
                             continue;
 
-                        // Insert language-only tags only if there is no known
-                        // matching fallback locale, otherwise we'd end up with
-                        // unwanted entries where a language autocorrection
-                        // file covers several locales. We do know a few
-                        // art-x-... though so exclude those and any other
-                        // private-use tag (which should not fallback, but
-                        // avoid).
+                        // Insert language(-script)-only tags only if there is
+                        // no known matching fallback locale, otherwise we'd
+                        // end up with unwanted entries where a language
+                        // autocorrection file covers several locales. We do
+                        // know a few art-x-... though so exclude those and any
+                        // other private-use tag (which should not fallback,
+                        // but avoid).
                         if (aLanguageTag.getCountry().isEmpty()
                                 && LanguageTag::isValidBcp47( aCanonicalized, 
nullptr,
                                     LanguageTag::PrivateUse::DISALLOW))
                         {
                             LanguageTag aFallback( aLanguageTag);
                             aFallback.makeFallback();
-                            if (aFallback.getLanguage() == 
aLanguageTag.getLanguage())
+                            if (aFallback.getLanguageAndScript() == 
aLanguageTag.getLanguageAndScript())
                                 continue;
                         }
 
diff --git a/i18nlangtag/qa/cppunit/test_languagetag.cxx 
b/i18nlangtag/qa/cppunit/test_languagetag.cxx
index 4a211012e80d..f7864d6104fd 100644
--- a/i18nlangtag/qa/cppunit/test_languagetag.cxx
+++ b/i18nlangtag/qa/cppunit/test_languagetag.cxx
@@ -753,6 +753,8 @@ bool checkMapping( std::u16string_view rStr1, 
std::u16string_view rStr2 )
     if (rStr1 == u"kw-UK"       ) return rStr2 == u"kw-GB";
     if (rStr1 == u"oc-FR"       ) return rStr2 == u"oc-FR-lengadoc";
     if (rStr1 == u"oc-ES"       ) return rStr2 == u"oc-ES-aranes";
+    if (rStr1 == u"zh-Hans-CN"  ) return rStr2 == u"zh-CN";
+    if (rStr1 == u"zh-Hant-TW"  ) return rStr2 == u"zh-TW";
     return rStr1 == rStr2;
 }
 
diff --git a/i18nlangtag/source/isolang/isolang.cxx 
b/i18nlangtag/source/isolang/isolang.cxx
index d20014e67f4b..4287fe247cb4 100644
--- a/i18nlangtag/source/isolang/isolang.cxx
+++ b/i18nlangtag/source/isolang/isolang.cxx
@@ -754,6 +754,8 @@ IsoLanguageScriptCountryEntry const 
aImplIsoLangScriptEntries[] =
     { LANGUAGE_BOSNIAN_LATIN_BOSNIA_HERZEGOVINA,    "bs-Latn", "BA", kSAME },  
 // MS, though Latn is suppress-script
     { LANGUAGE_BOSNIAN_LATIN_LSO,                   "bs-Latn", ""  , 
LANGUAGE_BOSNIAN_LSO },   // MS, though Latn is suppress-script
     { LANGUAGE_CHINESE_TRADITIONAL_LSO,             "zh-Hant", ""  , k0    },
+    { LANGUAGE_CHINESE_SIMPLIFIED,                  "zh-Hans", "CN", kSAME },  
 // canonical, but prefer legacy zh-CN
+    { LANGUAGE_CHINESE_TRADITIONAL,                 "zh-Hant", "TW", kSAME },  
 // canonical, but prefer legacy zh-TW
     { LANGUAGE_USER_MANINKAKAN_EASTERN_LATIN,      "emk-Latn", "GN", k0    },
     { LANGUAGE_USER_CREE_PLAINS_LATIN,             "crk-Latn", "CA", k0    },
     { LANGUAGE_USER_CREE_PLAINS_SYLLABICS,         "crk-Cans", "CA", k0    },
diff --git a/i18nlangtag/source/isolang/mslangid.cxx 
b/i18nlangtag/source/isolang/mslangid.cxx
index 34c55b66e261..419e28445774 100644
--- a/i18nlangtag/source/isolang/mslangid.cxx
+++ b/i18nlangtag/source/isolang/mslangid.cxx
@@ -191,14 +191,14 @@ bool MsLangId::usesHyphenation(LanguageType nLang)
 
 // static
 css::lang::Locale MsLangId::Conversion::convertLanguageToLocale(
-        LanguageType nLang )
+        LanguageType nLang, bool bIgnoreOverride )
 {
     css::lang::Locale aLocale;
     // Still resolve LANGUAGE_DONTKNOW if resolving is not requested,
     // but not LANGUAGE_SYSTEM or others.
     LanguageType nOrigLang = nLang;
     nLang = MsLangId::getRealLanguage(nLang);
-    convertLanguageToLocaleImpl( nLang, aLocale, true );
+    convertLanguageToLocaleImpl( nLang, aLocale, bIgnoreOverride );
     if (aLocale.Language.isEmpty() && simplifySystemLanguages(nOrigLang) == 
LANGUAGE_SYSTEM)
     {
         // None found but resolve requested, last resort is "en-US".
@@ -228,7 +228,7 @@ css::lang::Locale MsLangId::getFallbackLocale(
 {
     // empty language => LANGUAGE_SYSTEM
     if (rLocale.Language.isEmpty())
-        return Conversion::lookupFallbackLocale( 
Conversion::convertLanguageToLocale( LANGUAGE_SYSTEM ));
+        return Conversion::lookupFallbackLocale( 
Conversion::convertLanguageToLocale( LANGUAGE_SYSTEM, false));
     else
         return Conversion::lookupFallbackLocale( rLocale);
 }
diff --git a/i18nlangtag/source/languagetag/languagetag.cxx 
b/i18nlangtag/source/languagetag/languagetag.cxx
index 239215a187c1..7071f2dcb116 100644
--- a/i18nlangtag/source/languagetag/languagetag.cxx
+++ b/i18nlangtag/source/languagetag/languagetag.cxx
@@ -961,7 +961,7 @@ LanguageTag::ImplPtr LanguageTag::registerImpl() const
                         // May have involved canonicalize(), so compare with
                         // pImpl->maBcp47 instead of maBcp47!
                         aBcp47 = LanguageTagImpl::convertToBcp47(
-                                MsLangId::Conversion::convertLanguageToLocale( 
pImpl->mnLangID ));
+                                MsLangId::Conversion::convertLanguageToLocale( 
pImpl->mnLangID, true));
                         bInsert = (aBcp47 == pImpl->maBcp47);
                     }
                 }
@@ -1352,7 +1352,7 @@ void LanguageTagImpl::convertLocaleToBcp47()
         // locale via LanguageTag::convertToBcp47(LanguageType) and
         // LanguageTag::convertToLocale(LanguageType) would instantiate another
         // LanguageTag.
-        maLocale = MsLangId::Conversion::convertLanguageToLocale( 
LANGUAGE_SYSTEM );
+        maLocale = MsLangId::Conversion::convertLanguageToLocale( 
LANGUAGE_SYSTEM, false);
     }
     if (maLocale.Language.isEmpty())
     {
@@ -1496,7 +1496,7 @@ void LanguageTagImpl::convertLangToLocale()
         mbInitializedLangID = true;
     }
     // Resolve system here! The original is remembered as mbSystemLocale.
-    maLocale = MsLangId::Conversion::convertLanguageToLocale( mnLangID );
+    maLocale = MsLangId::Conversion::convertLanguageToLocale( mnLangID, false);
     mbInitializedLocale = true;
 }
 
diff --git a/include/i18nlangtag/mslangid.hxx b/include/i18nlangtag/mslangid.hxx
index 1d2f5cc8c651..f59e2d683def 100644
--- a/include/i18nlangtag/mslangid.hxx
+++ b/include/i18nlangtag/mslangid.hxx
@@ -292,11 +292,12 @@ public:
 
         /** Convert a LanguageType to a Locale. */
         I18NLANGTAG_DLLPRIVATE static css::lang::Locale 
convertLanguageToLocale(
-                LanguageType nLang );
+                LanguageType nLang, bool bIgnoreOverride );
 
         /** Used by convertLanguageToLocale(LanguageType,bool) and
             getLocale(IsoLanguageCountryEntry*) and
-            getLocale(IsoLanguageScriptCountryEntry)
+            getLocale(IsoLanguageScriptCountryEntry*) and
+            getLocale(Bcp47CountryEntry*)
 
             @param  bIgnoreOverride
                     If bIgnoreOverride==true, a matching entry is used even if

Reply via email to