formula/source/core/api/FormulaCompiler.cxx |   19 +++++-
 sc/inc/compiler.hxx                         |   10 ++-
 sc/source/core/tool/compiler.cxx            |   82 ++++++++++++++++++++--------
 3 files changed, 83 insertions(+), 28 deletions(-)

New commits:
commit af75098d524311416a5f7caf6ae76055cc689ad1
Author:     Eike Rathke <er...@redhat.com>
AuthorDate: Mon Sep 28 21:02:23 2020 +0200
Commit:     Muhammet Kara <muhammet.k...@collabora.com>
CommitDate: Sun Oct 18 22:36:40 2020 +0200

    Resolves: tdf#137091 Use CharClass matching the formula language
    
     This is a combination of 3 commits.
    
    Resolves: tdf#137091 Use CharClass matching the formula language
    
    ... not the current locale. Specifically important for
    uppercase/lowercase conversions that may yield different results
    for example in Turkish i with/without dot.
    
    I2aa57cdcf530d7a0697c4ffbd5dccb86bb526d8e
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/103588
    Tested-by: Jenkins
    Reviewed-by: Eike Rathke <er...@redhat.com>
    (cherry picked from commit 3c6177be2705303044e3de262689d593f3d0f282)
    Signed-off-by: Xisco Fauli <xiscofa...@libreoffice.org>
    
    Current sytem locale's CharClass for user defined names, tdf#137091 
follow-up
    
    I5f025a12ca183acb3f80d2a7527677aceb9ffbd5
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/103593
    Reviewed-by: Eike Rathke <er...@redhat.com>
    Tested-by: Jenkins
    (cherry picked from commit d41c45a522c5e973d7043d36bc6c82e77735ab9b)
    
    Determine CharClass difference once, tdf#137091 follow-up
    
    As a side note:
    Clang plugin simplifybool for
    !(rLT1.getLanguage() == "en" && rLT2.getLanguage() == "en")
    told "error: logical negation of logical op containing negation, can be 
simplified"
    which is nonsense (the message stayed the same while the checks evolved).
    It actually complained about !(a==b && c==d) to be rewritten as
    (a!=b || c!=d) whether that makes sense or not.. it may save one
    boolean operation, yes, but..
    
    Ib478d46d7ff926c1c9f65fec059c7a3f31fa7ce3
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/103601
    Tested-by: Jenkins
    Reviewed-by: Eike Rathke <er...@redhat.com>
    (cherry picked from commit 1acf517906b7cdc4931dd26319d467dff53ae7d2)
    
     Conflicts:
            sc/source/core/tool/compiler.cxx
    
    Change-Id: I2aa57cdcf530d7a0697c4ffbd5dccb86bb526d8e
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/103598
    Tested-by: Jenkins
    Reviewed-by: Xisco Fauli <xiscofa...@libreoffice.org>
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/104486
    Tested-by: Jenkins CollaboraOffice <jenkinscollaboraoff...@gmail.com>
    Reviewed-by: Muhammet Kara <muhammet.k...@collabora.com>

diff --git a/formula/source/core/api/FormulaCompiler.cxx 
b/formula/source/core/api/FormulaCompiler.cxx
index 16e58f9c8e71..5de0b8d76491 100644
--- a/formula/source/core/api/FormulaCompiler.cxx
+++ b/formula/source/core/api/FormulaCompiler.cxx
@@ -31,6 +31,9 @@
 #include <svl/zforlist.hxx>
 #include <unotools/resmgr.hxx>
 #include <unotools/charclass.hxx>
+#include <vcl/svapp.hxx>
+#include <vcl/settings.hxx>
+#include <comphelper/processfactory.hxx>
 #include <com/sun/star/sheet/FormulaOpCodeMapEntry.hpp>
 #include <com/sun/star/sheet/FormulaMapGroup.hpp>
 #include <com/sun/star/sheet/FormulaMapGroupSpecialOffset.hpp>
@@ -143,6 +146,14 @@ void lclPushOpCodeMapEntries( ::std::vector< 
sheet::FormulaOpCodeMapEntry >& rVe
         lclPushOpCodeMapEntry( rVec, pTable, *pnOpCodes );
 }
 
+CharClass* createCharClassIfNonEnglishUI()
+{
+    const LanguageTag& rLanguageTag( 
Application::GetSettings().GetUILanguageTag());
+    if (rLanguageTag.getLanguage() == "en")
+        return nullptr;
+    return new CharClass( ::comphelper::getProcessComponentContext(), 
rLanguageTag);
+}
+
 class OpCodeList
 {
 public:
@@ -166,8 +177,8 @@ OpCodeList::OpCodeList(bool bLocalized, const 
std::pair<const char*, int>* pSymb
     , mpSymbols(pSymbols)
     , mbLocalized(bLocalized)
 {
-    SvtSysLocale aSysLocale;
-    const CharClass* pCharClass = (xMap->isEnglish() ? nullptr : 
aSysLocale.GetCharClassPtr());
+    std::unique_ptr<CharClass> xCharClass( xMap->isEnglish() ? nullptr : 
createCharClassIfNonEnglishUI());
+    const CharClass* pCharClass = xCharClass.get();
     if (meSepType == FormulaCompiler::SeparatorType::RESOURCE_BASE)
     {
         for (sal_uInt16 i = 0; i <= SC_OPCODE_LAST_OPCODE_ID; ++i)
@@ -813,8 +824,8 @@ FormulaCompiler::OpCodeMapPtr 
FormulaCompiler::CreateOpCodeMap(
     NonConstOpCodeMapPtr xMap( new OpCodeMap( SC_OPCODE_LAST_OPCODE_ID + 1, 
false,
                 FormulaGrammar::mergeToGrammar( FormulaGrammar::setEnglishBit(
                         FormulaGrammar::GRAM_EXTERNAL, bEnglish), 
FormulaGrammar::CONV_UNSPECIFIED)));
-    SvtSysLocale aSysLocale;
-    const CharClass* pCharClass = (xMap->isEnglish() ? nullptr : 
aSysLocale.GetCharClassPtr());
+        std::unique_ptr<CharClass> xCharClass( xMap->isEnglish() ? nullptr : 
createCharClassIfNonEnglishUI());
+    const CharClass* pCharClass = xCharClass.get();
     for (auto const& rMapEntry : rMapping)
     {
         OpCode eOp = OpCode(rMapEntry.Token.OpCode);
diff --git a/sc/inc/compiler.hxx b/sc/inc/compiler.hxx
index c19ff834ded9..5de80a6b9fb3 100644
--- a/sc/inc/compiler.hxx
+++ b/sc/inc/compiler.hxx
@@ -251,7 +251,8 @@ public:
 
 private:
 
-    static CharClass            *pCharClassEnglish;                      // 
character classification for en_US locale
+    static const CharClass      *pCharClassEnglish;     // character 
classification for en_US locale
+    static const CharClass      *pCharClassLocalized;   // character 
classification for UI locale
     static const Convention     *pConventions[ 
formula::FormulaGrammar::CONV_LAST ];
 
     static const struct AddInMap
@@ -282,7 +283,8 @@ private:
 
     std::queue<OpCode> maPendingOpCodes; // additional opcodes generated from 
a single symbol
 
-    const CharClass*    pCharClass;         // which character classification 
is used for parseAnyToken
+    const CharClass* pCharClass; // which character classification is used for 
parseAnyToken and upper/lower
+    bool        mbCharClassesDiffer;    // whether pCharClass and current 
system locale's CharClass differ
     sal_uInt16      mnPredetectedReference;     // reference when reading ODF, 
0 (none), 1 (single) or 2 (double)
     sal_Int32   mnRangeOpPosInSymbol;       // if and where a range operator 
is in symbol
     const Convention *pConv;
@@ -319,6 +321,7 @@ private:
 #endif
 
     bool   NextNewToken(bool bInArray);
+    bool ToUpperAsciiOrI18nIsAscii( OUString& rUpper, const OUString& rOrg ) 
const;
 
     virtual void SetError(FormulaError nError) override;
     sal_Int32 NextSymbol(bool bInArray);
@@ -349,7 +352,8 @@ private:
      */
     ScRangeData* GetRangeData( const formula::FormulaToken& pToken ) const;
 
-    static void InitCharClassEnglish();
+    static const CharClass* GetCharClassEnglish();
+    static const CharClass* GetCharClassLocalized();
 
 public:
     ScCompiler( sc::CompileFormulaContext& rCxt, const ScAddress& rPos,
diff --git a/sc/source/core/tool/compiler.cxx b/sc/source/core/tool/compiler.cxx
index d84efe3b1003..bb5efae527fd 100644
--- a/sc/source/core/tool/compiler.cxx
+++ b/sc/source/core/tool/compiler.cxx
@@ -22,6 +22,7 @@
 #include <compiler.hxx>
 
 #include <vcl/svapp.hxx>
+#include <vcl/settings.hxx>
 #include <sfx2/app.hxx>
 #include <sfx2/objsh.hxx>
 #include <basic/sbmeth.hxx>
@@ -78,7 +79,8 @@ using namespace formula;
 using namespace ::com::sun::star;
 using ::std::vector;
 
-CharClass*                          ScCompiler::pCharClassEnglish = nullptr;
+const CharClass*                    ScCompiler::pCharClassEnglish = nullptr;
+const CharClass*                    ScCompiler::pCharClassLocalized = nullptr;
 const ScCompiler::Convention*       ScCompiler::pConventions[ ]   = { nullptr, 
nullptr, nullptr, nullptr, nullptr, nullptr };
 
 enum ScanState
@@ -168,12 +170,17 @@ void ScCompiler::DeInit()
         delete pCharClassEnglish;
         pCharClassEnglish = nullptr;
     }
+    if (pCharClassLocalized)
+    {
+        delete pCharClassLocalized;
+        pCharClassLocalized = nullptr;
+    }
 }
 
 bool ScCompiler::IsEnglishSymbol( const OUString& rName )
 {
     // function names are always case-insensitive
-    OUString aUpper = ScGlobal::pCharClass->uppercase(rName);
+    OUString aUpper = GetCharClassEnglish()->uppercase(rName);
 
     // 1. built-in function name
     OpCode eOp = ScCompiler::GetEnglishOpCode( aUpper );
@@ -192,11 +199,27 @@ bool ScCompiler::IsEnglishSymbol( const OUString& rName )
     return !aIntName.isEmpty();       // no valid function name
 }
 
-void ScCompiler::InitCharClassEnglish()
+const CharClass* ScCompiler::GetCharClassEnglish()
 {
-    css::lang::Locale aLocale( "en", "US", "");
-    pCharClassEnglish = new CharClass(
-            ::comphelper::getProcessComponentContext(), LanguageTag( aLocale));
+    if (!pCharClassEnglish)
+    {
+        css::lang::Locale aLocale( "en", "US", "");
+        pCharClassEnglish = new CharClass(
+                ::comphelper::getProcessComponentContext(), LanguageTag( 
aLocale));
+    }
+    return pCharClassEnglish;
+}
+
+const CharClass* ScCompiler::GetCharClassLocalized()
+{
+    if (!pCharClassLocalized)
+    {
+        // Switching UI language requires restart; if not, we would have to
+        // keep track of that.
+        pCharClassLocalized = new CharClass(
+                ::comphelper::getProcessComponentContext(), 
Application::GetSettings().GetUILanguageTag());
+    }
+    return pCharClassLocalized;
 }
 
 void ScCompiler::SetGrammar( const FormulaGrammar::Grammar eGrammar )
@@ -261,13 +284,19 @@ void ScCompiler::SetFormulaLanguage( const 
ScCompiler::OpCodeMapPtr & xMap )
     {
         mxSymbols = xMap;
         if (mxSymbols->isEnglish())
-        {
-            if (!pCharClassEnglish)
-                InitCharClassEnglish();
-            pCharClass = pCharClassEnglish;
-        }
+            pCharClass = GetCharClassEnglish();
         else
-            pCharClass = ScGlobal::pCharClass;
+            pCharClass = GetCharClassLocalized();
+
+        // The difference is needed for an uppercase() call that usually does 
not
+        // result in different strings but for a few languages like Turkish;
+        // though even de-DE and de-CH may differ in ß/SS handling..
+        // At least don't care if both are English.
+        // The current locale is more likely to not be "en" so check first.
+        const LanguageTag& rLT1 = ScGlobal::GetpLocaleData()->getLanguageTag();
+        const LanguageTag& rLT2 = pCharClass->getLanguageTag();
+        mbCharClassesDiffer = (rLT1 != rLT2 && (rLT1.getLanguage() != "en" || 
rLT2.getLanguage() != "en"));
+
         SetGrammarAndRefConvention( mxSymbols->getGrammar(), GetGrammar());
     }
 }
@@ -1789,6 +1818,7 @@ ScCompiler::ScCompiler( sc::CompileFormulaContext& rCxt, 
const ScAddress& rPos,
     mnCurrentSheetTab(-1),
     mnCurrentSheetEndPos(0),
     pCharClass(ScGlobal::pCharClass),
+    mbCharClassesDiffer(false),
     mnPredetectedReference(0),
     mnRangeOpPosInSymbol(-1),
     pConv(GetRefConvention(FormulaGrammar::CONV_OOO)),
@@ -1812,6 +1842,7 @@ ScCompiler::ScCompiler( ScDocument* pDocument, const 
ScAddress& rPos, ScTokenArr
         mnCurrentSheetEndPos(0),
         nSrcPos(0),
         pCharClass( ScGlobal::pCharClass ),
+        mbCharClassesDiffer(false),
         mnPredetectedReference(0),
         mnRangeOpPosInSymbol(-1),
         pConv( GetRefConvention( FormulaGrammar::CONV_OOO ) ),
@@ -1834,6 +1865,7 @@ ScCompiler::ScCompiler( sc::CompileFormulaContext& rCxt, 
const ScAddress& rPos,
     mnCurrentSheetTab(-1),
     mnCurrentSheetEndPos(0),
     pCharClass(ScGlobal::pCharClass),
+    mbCharClassesDiffer(false),
     mnPredetectedReference(0),
     mnRangeOpPosInSymbol(-1),
     pConv(GetRefConvention(FormulaGrammar::CONV_OOO)),
@@ -1857,6 +1889,7 @@ ScCompiler::ScCompiler( ScDocument* pDocument, const 
ScAddress& rPos,
         mnCurrentSheetEndPos(0),
         nSrcPos(0),
         pCharClass( ScGlobal::pCharClass ),
+        mbCharClassesDiffer(false),
         mnPredetectedReference(0),
         mnRangeOpPosInSymbol(-1),
         pConv( GetRefConvention( FormulaGrammar::CONV_OOO ) ),
@@ -4157,9 +4190,9 @@ void ScCompiler::AutoCorrectParsedSymbol()
     }
 }
 
-static bool lcl_UpperAsciiOrI18n( OUString& rUpper, const OUString& rOrg, 
FormulaGrammar::Grammar eGrammar )
+bool ScCompiler::ToUpperAsciiOrI18nIsAscii( OUString& rUpper, const OUString& 
rOrg ) const
 {
-    if (FormulaGrammar::isODFF( eGrammar ))
+    if (FormulaGrammar::isODFF( meGrammar ))
     {
         // ODFF has a defined set of English function names, avoid i18n
         // overhead.
@@ -4168,7 +4201,8 @@ static bool lcl_UpperAsciiOrI18n( OUString& rUpper, const 
OUString& rOrg, Formul
     }
     else
     {
-        rUpper = ScGlobal::pCharClass->uppercase(rOrg);
+        // One of localized or English.
+        rUpper = pCharClass->uppercase(rOrg);
         return false;
     }
 }
@@ -4262,7 +4296,7 @@ bool ScCompiler::NextNewToken( bool bInArray )
     else
     {
         OUString aTmpStr( cSymbol[0] );
-        bMayBeFuncName = ScGlobal::pCharClass->isLetter( aTmpStr, 0 );
+        bMayBeFuncName = pCharClass->isLetter( aTmpStr, 0 );
         bAsciiNonAlnum = false;
     }
 
@@ -4313,7 +4347,7 @@ bool ScCompiler::NextNewToken( bool bInArray )
 
         if (bAsciiNonAlnum)
         {
-            bAsciiUpper = lcl_UpperAsciiOrI18n( aUpper, aOrg, meGrammar);
+            bAsciiUpper = ToUpperAsciiOrI18nIsAscii( aUpper, aOrg);
             if (cSymbol[0] == '#')
             {
                 // Check for TableRef item specifiers first.
@@ -4339,7 +4373,7 @@ bool ScCompiler::NextNewToken( bool bInArray )
         if (bMayBeFuncName)
         {
             if (aUpper.isEmpty())
-                bAsciiUpper = lcl_UpperAsciiOrI18n( aUpper, aOrg, meGrammar);
+                bAsciiUpper = ToUpperAsciiOrI18nIsAscii( aUpper, aOrg);
             if (IsOpCode( aUpper, bInArray ))
                 return true;
         }
@@ -4363,7 +4397,7 @@ bool ScCompiler::NextNewToken( bool bInArray )
         }
 
         if (aUpper.isEmpty())
-            bAsciiUpper = lcl_UpperAsciiOrI18n( aUpper, aOrg, meGrammar);
+            bAsciiUpper = ToUpperAsciiOrI18nIsAscii( aUpper, aOrg);
 
         // IsBoolean() before IsValue() to catch inline bools without the 
kludge
         //    for inline arrays.
@@ -4374,8 +4408,14 @@ bool ScCompiler::NextNewToken( bool bInArray )
             return true;
 
         // User defined names and such do need i18n upper also in ODF.
-        if (bAsciiUpper)
+        if (bAsciiUpper || mbCharClassesDiffer)
+        {
+            // Use current system locale here because user defined symbols are
+            // more likely in that localized language than in the formula
+            // language. This in corner cases needs to continue to work for
+            // existing documents and environments.
             aUpper = ScGlobal::pCharClass->uppercase( aOrg );
+        }
 
         if (IsNamedRange( aUpper ))
             return true;
@@ -4433,7 +4473,7 @@ bool ScCompiler::NextNewToken( bool bInArray )
     // Provide single token information and continue. Do not set an error, that
     // would prematurely end compilation. Simple unknown names are handled by
     // the interpreter.
-    aUpper = ScGlobal::pCharClass->lowercase( aUpper );
+    aUpper = pCharClass->lowercase( aUpper );
     svl::SharedString aSS = pDoc->GetSharedStringPool().intern(aUpper);
     maRawToken.SetString(aSS.getData(), aSS.getDataIgnoreCase());
     maRawToken.NewOpCode( ocBad );
_______________________________________________
Libreoffice-commits mailing list
libreoffice-comm...@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/libreoffice-commits

Reply via email to