editeng/source/editeng/impedit2.cxx                  |    3 +--
 sw/qa/core/text/data/scriptinfo-surrogate-pairs.fodt |    1 +
 sw/qa/core/text/text.cxx                             |   13 +++++++++++--
 sw/source/core/text/porlay.cxx                       |    2 +-
 4 files changed, 14 insertions(+), 5 deletions(-)

New commits:
commit 5526c523bc9fda890e15eacd45f280b0827f8ea0
Author:     Khaled Hosny <kha...@libreoffice.org>
AuthorDate: Sat Jul 29 01:01:58 2023 +0300
Commit:     Michael Stahl <michael.st...@allotropia.de>
CommitDate: Mon Jul 31 12:32:17 2023 +0200

    Fix surrogate pairs handling when tweaking script info
    
    Followup to:
    
    commit 3af30bafbedb8eb481024efb35cb7876c63d26dc
    Author: Khaled Hosny <kha...@libreoffice.org>
    Date:   Thu Jul 27 19:03:28 2023 +0300
    
        sw: Handle surrogate pairs when tweaking script info
    
    and:
    
    commit d6efe8c302b81886706e18640148c51cf7883bbf
    Author: Khaled Hosny <kha...@libreoffice.org>
    Date:   Thu Jul 27 20:39:22 2023 +0300
    
        tdf#112594: Group NNBSP with the Mongolian characters after it
    
    Change-Id: Ie273c457e4f3ed31a3372bc8eb0eb0055c1b97b1
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/155053
    Tested-by: Jenkins
    Reviewed-by: Michael Stahl <michael.st...@allotropia.de>

diff --git a/editeng/source/editeng/impedit2.cxx 
b/editeng/source/editeng/impedit2.cxx
index d67c49b072b8..8d4960069cc8 100644
--- a/editeng/source/editeng/impedit2.cxx
+++ b/editeng/source/editeng/impedit2.cxx
@@ -1742,8 +1742,7 @@ void ImpEditEngine::InitScriptTypes( sal_Int32 nPara )
                     (nPrevChar == 0x202F /* NNBSP, tdf#112594 */ &&
                      u_getIntPropertyValue(nChar, UCHAR_SCRIPT) == 
USCRIPT_MONGOLIAN))
                 {
-                    --nPos;
-                    rTypes.back().nEndPos--;
+                    rTypes.back().nEndPos = nPos = nPrevPos;
                     break;
                 }
             }
diff --git a/sw/qa/core/text/data/scriptinfo-surrogate-pairs.fodt 
b/sw/qa/core/text/data/scriptinfo-surrogate-pairs.fodt
index c14997a80741..5b54fc9e8601 100644
--- a/sw/qa/core/text/data/scriptinfo-surrogate-pairs.fodt
+++ b/sw/qa/core/text/data/scriptinfo-surrogate-pairs.fodt
@@ -287,6 +287,7 @@
     <text:sequence-decl text:display-outline-level="0" text:name="Figure"/>
    </text:sequence-decls>
    <text:p text:style-name="Standard">11β—Œπ»½</text:p>
+   <text:p text:style-name="Standard">11π€ΩŽ</text:p>
   </office:text>
  </office:body>
 </office:document>
\ No newline at end of file
diff --git a/sw/qa/core/text/text.cxx b/sw/qa/core/text/text.cxx
index 9d9e99f8d99b..123be460df8c 100644
--- a/sw/qa/core/text/text.cxx
+++ b/sw/qa/core/text/text.cxx
@@ -1475,9 +1475,18 @@ CPPUNIT_TEST_FIXTURE(SwCoreTextTest, 
testScriptinfosurrogatePairs)
     // Without the fix it fails with:
     // - Expected: 11
     // - Actual  : 11β—Œ
-    assertXPath(pXmlDoc, "//SwParaPortion/SwLineLayout/SwLinePortion[1]", 
"portion", u"11");
-    assertXPath(pXmlDoc, "//SwParaPortion/SwLineLayout/SwLinePortion[2]", 
"portion",
+    assertXPath(pXmlDoc, 
"//txt[1]/SwParaPortion/SwLineLayout/SwLinePortion[1]", "portion", u"11");
+    assertXPath(pXmlDoc, 
"//txt[1]/SwParaPortion/SwLineLayout/SwLinePortion[2]", "portion",
                 u"\u25CC\U00010EFD");
+
+    // Without the fix this would crash because we got a lone surrogate that
+    // can’t be converted to UTF-8, but if it were not for that it might fail
+    // with something like:
+    // - Expected: 11
+    // - Actual  : 11𝐀
+    assertXPath(pXmlDoc, 
"//txt[2]/SwParaPortion/SwLineLayout/SwLinePortion[1]", "portion", u"11");
+    assertXPath(pXmlDoc, 
"//txt[2]/SwParaPortion/SwLineLayout/SwLinePortion[2]", "portion",
+                u"\U0001D400\u064E");
 }
 
 CPPUNIT_TEST_FIXTURE(SwCoreTextTest, testTdf112594)
diff --git a/sw/source/core/text/porlay.cxx b/sw/source/core/text/porlay.cxx
index 7d406a86df0c..567026d5b77a 100644
--- a/sw/source/core/text/porlay.cxx
+++ b/sw/source/core/text/porlay.cxx
@@ -1457,7 +1457,7 @@ void SwScriptInfo::InitScriptInfo(const SwTextNode& rNode,
                 (nPrevChar == CHAR_NNBSP &&
                  u_getIntPropertyValue(nChar, UCHAR_SCRIPT) == 
USCRIPT_MONGOLIAN))
             {
-                --nPos;
+                nPos = nPrevPos;
             }
         }
         m_ScriptChanges.emplace_back(TextFrameIndex(nPos), nScript);

Reply via email to