commit ea3cdb15529d013bcac231c5b1f25628f5067b42
Author: Jean-Marc Lasgouttes <[email protected]>
Date:   Wed Nov 2 17:42:56 2016 +0100

    QString only uses UTF-16, not UCS-4
    
    This means that, when a string contains high-plane Unicode characters,
    the length of a docstring and the corresponding QString will be
    different: Qt will encode these characters using several 16bit
    characters.
    
    We have additionally to take into account QTBUG-25536, which implies
    that sometimes qstring_to_ucs4(toqstr(s)) !=s. It is not clear whether
    this bug can be a problem in other places.
    
    Fixes bug #10443.
    (cherry picked from commit 5d85a42bf0a182585b800a80817a6e1208ef7ec3)
---
 src/frontends/qt4/GuiFontMetrics.cpp |   65 +++++++++++++++++++++++++++++-----
 status.22x                           |    3 ++
 2 files changed, 59 insertions(+), 9 deletions(-)

diff --git a/src/frontends/qt4/GuiFontMetrics.cpp 
b/src/frontends/qt4/GuiFontMetrics.cpp
index 74ed392..1d472b5 100644
--- a/src/frontends/qt4/GuiFontMetrics.cpp
+++ b/src/frontends/qt4/GuiFontMetrics.cpp
@@ -206,7 +206,12 @@ int GuiFontMetrics::pos2x(docstring const & s, int const 
pos, bool const rtl,
                           double const wordspacing) const
 {
        QTextLayout const & tl = getTextLayout(s, font_, rtl, wordspacing);
-       return static_cast<int>(tl.lineForTextPosition(pos).cursorToX(pos));
+       /* Since QString is UTF-16 and docstring is UCS-4, the offsets may
+        * not be the same when there are high-plan unicode characters
+        * (bug #10443).
+        */
+       int const qpos = toqstr(s.substr(0, pos)).length();
+       return static_cast<int>(tl.lineForTextPosition(qpos).cursorToX(qpos));
 }
 
 
@@ -214,10 +219,30 @@ int GuiFontMetrics::x2pos(docstring const & s, int & x, 
bool const rtl,
                           double const wordspacing) const
 {
        QTextLayout const & tl = getTextLayout(s, font_, rtl, wordspacing);
-       int pos = tl.lineForTextPosition(0).xToCursor(x);
+       int const qpos = tl.lineForTextPosition(0).xToCursor(x);
        // correct x value to the actual cursor position.
-       x = static_cast<int>(tl.lineForTextPosition(0).cursorToX(pos));
+       x = static_cast<int>(tl.lineForTextPosition(0).cursorToX(qpos));
+       /* Since QString is UTF-16 and docstring is UCS-4, the offsets may
+        * not be the same when there are high-plan unicode characters
+        * (bug #10443).
+        */
+#if QT_VERSION < 0x040801 || QT_VERSION >= 0x050100
+       return qstring_to_ucs4(tl.text().left(qpos)).length();
+#else
+       /* Due to QTBUG-25536 in 4.8.1 <= Qt < 5.1.0, the string returned
+        * by QString::toUcs4 (used by qstring_to_ucs4)may have wrong
+        * length. We work around the problem by trying all docstring
+        * positions until the right one is found. This is slow only if
+        * there are many high-plane Unicode characters. It might be
+        * worthwhile to implement a dichotomy search if this shows up
+        * under a profiler.
+        */
+       int pos = min(qpos, static_cast<int>(s.length()));
+       while (pos >= 0 && toqstr(s.substr(0, pos)).length() != qpos)
+               --pos;
+       LASSERT(pos > 0 || qpos == 0, /**/);
        return pos;
+#endif
 }
 
 
@@ -235,17 +260,17 @@ bool GuiFontMetrics::breakAt(docstring & s, int & x, bool 
const rtl, bool const
        */
        // Unicode character ZERO WIDTH NO-BREAK SPACE
        QChar const zerow_nbsp(0xfeff);
-       QString str = zerow_nbsp + toqstr(s) + zerow_nbsp;
+       QString qs = zerow_nbsp + toqstr(s) + zerow_nbsp;
 #if 1
        /* Use unicode override characters to enforce drawing direction
         * Source: http://www.iamcal.com/understanding-bidirectional-text/
         */
        if (rtl)
                // Right-to-left override: forces to draw text right-to-left
-               str = QChar(0x202E) + str;
+               qs = QChar(0x202E) + qs;
        else
                // Left-to-right override: forces to draw text left-to-right
-               str =  QChar(0x202D) + str;
+               qs =  QChar(0x202D) + qs;
        int const offset = 2;
 #else
        // Alternative version that breaks with Qt5 and arabic text (#10436)
@@ -254,7 +279,7 @@ bool GuiFontMetrics::breakAt(docstring & s, int & x, bool 
const rtl, bool const
        int const offset = 1;
 #endif
 
-       tl.setText(str);
+       tl.setText(qs);
        tl.setFont(font_);
        QTextOption to;
        to.setWrapMode(force ? QTextOption::WrapAnywhere : 
QTextOption::WordWrap);
@@ -267,8 +292,30 @@ bool GuiFontMetrics::breakAt(docstring & s, int & x, bool 
const rtl, bool const
        if ((force && line.textLength() == offset) || 
int(line.naturalTextWidth()) > x)
                return false;
        x = int(line.naturalTextWidth());
-       // The offset is here to account for the extra leading characters.
-       s = s.substr(0, line.textLength() - offset);
+       /* Since QString is UTF-16 and docstring is UCS-4, the offsets may
+        * not be the same when there are high-plan unicode characters
+        * (bug #10443).
+        */
+       // The variable `offset' is here to account for the extra leading 
characters.
+       // The ending character zerow_nbsp has to be ignored if the line is 
complete.
+       int const qlen = line.textLength() - offset - (line.textLength() == 
qs.length());
+#if QT_VERSION < 0x040801 || QT_VERSION >= 0x050100
+       s = qstring_to_ucs4(qs.mid(offset, qlen));
+#else
+       /* Due to QTBUG-25536 in 4.8.1 <= Qt < 5.1.0, the string returned
+        * by QString::toUcs4 (used by qstring_to_ucs4)may have wrong
+        * length. We work around the problem by trying all docstring
+        * positions until the right one is found. This is slow only if
+        * there are many high-plane Unicode characters. It might be
+        * worthwhile to implement a dichotomy search if this shows up
+        * under a profiler.
+        */
+       int len = min(qlen, static_cast<int>(s.length()));
+       while (len >= 0 && toqstr(s.substr(0, len)).length() != qlen)
+               --len;
+       LASSERT(len > 0 || qlen == 0, /**/);
+       s = s.substr(0, len);
+#endif
        return true;
 }
 
diff --git a/status.22x b/status.22x
index 49b351d..2719c6a 100644
--- a/status.22x
+++ b/status.22x
@@ -118,6 +118,9 @@ What's new
 
 - Fix selection painting in right-to-left texts (bug 10424).
 
+- Fix cursor movement when the document contains high-plane Unicode
+  characters (bug 10443).
+
 - Allow using colors supported by xcolor inside mathed (bug 10417).
 
 - Change description of "frame of button" color to "button frame" (bug 10135).

Reply via email to