commit ea1a5cb80eaef71bc1cb7021bc3dbf2ef7a793b7
Author: Jean-Marc Lasgouttes <lasgout...@lyx.org>
Date:   Thu Apr 10 16:37:21 2014 +0200

    Draw right-to-left text string-wise using Qt
    
    We rely on Qt built-in unicode support for handling Arabic and Hebrew
    compose characters. This allows to avoid to use our homegrown
    machinery.
    
    This  should provide a nice speedup at a low cost and
    will eventually allow us to get rid of:
     * most of our Arabic/Hebrew machinery in Encodings.cpp,
     * Paragraph::transformChar,
     * and probably more.

diff --git a/00README_STR_METRICS_BRANCH b/00README_STR_METRICS_BRANCH
index 82026e8..359bb03 100644
--- a/00README_STR_METRICS_BRANCH
+++ b/00README_STR_METRICS_BRANCH
@@ -11,7 +11,7 @@ what we have with force_paint_single_char. Moreover there has 
been
 some code factorization in TextMetrics, where the same row-breaking
 algorithm was basically implemented 3 times.
 
-Currently everything is supposed to work for LTR text, and RTL text
+Currently everything is supposed to work for LTR text, and RtL text
 should work too except possibly metrics with Arabic and Hebrew fonts.
 
 When KEEP_OLD_METRICS_CODE is defined in TextMetrics.cpp, the new code
@@ -40,12 +40,13 @@ What is done:
   lyxrc.force_paint_single_char is false. In this case, remove also
   useless workarounds which disable kerning and ligatures.
 
+* when lyxrc.force_paint_single_char is false, draw also RtL text
+  string-wise. This both speed-up drawing and prepare for code
+  removal, since we now rely on Qt to do things we use to do by
+  ourselves (see isArabic* and isHebrew* code in Encodings.cpp).
 
-Next steps:
 
-* check what happens with Arabic and/or Hebrew text. There may be some
-  problems related to compose characters. Investigate whether RtL
-  strings could be drawn on a string-wise basis.
+Next steps:
 
 * investigate whether strings could be cut at separators in RowPainter
   only in justified text. This would speed-up painting in other cases
@@ -76,18 +77,18 @@ Steps for later (aka out of the scope of this branch):
 
 Known bugs:
 
-* there are still difference in what breaks words. In particular,
-  RowPainter breaks strings at: selection end, spell-checking
-  extremities. This seems to be harmless.
-
-* when clicking in the right margin, getPosNearX does not return
-  the same value as before. I am not sure whether this is important.
-
 * When selecting text, the display seems to move around. This is
   because partly selected words are drawn in two parts, and in case
   like "ef|fort" or "V|AN", there are some ligature or kerning effects
   that change the display. I am not sure yet how to fix that.
 
+* there are other differences in what breaks words. In particular,
+  RowPainter breaks strings at spell-checking extremities. This seems
+  to be harmless.
+
+* when clicking in the right margin, getPosNearX does not return
+  the same value as before. I am not sure whether this is important.
+
 
 Other differences in behavior (aka bug fixes):
 
diff --git a/src/Encoding.cpp b/src/Encoding.cpp
index a6f6a60..7c5dc87 100644
--- a/src/Encoding.cpp
+++ b/src/Encoding.cpp
@@ -711,6 +711,12 @@ docstring Encodings::fromLaTeXCommand(docstring const & 
cmd, int cmdtype,
 }
 
 
+bool Encodings::isHebrewChar(char_type c)
+{
+       return c >= 0x0590 && c <= 0x05ff;
+}
+
+
 bool Encodings::isHebrewComposeChar(char_type c)
 {
        return c <= 0x05c2 && c >= 0x05b0 && c != 0x05be && c != 0x05c0;
diff --git a/src/Encoding.h b/src/Encoding.h
index ed9c27e..63aa93b 100644
--- a/src/Encoding.h
+++ b/src/Encoding.h
@@ -269,6 +269,8 @@ public:
        ///
        static bool isHebrewComposeChar(char_type c);
        ///
+       static bool isHebrewChar(char_type c);
+       ///
        static bool isArabicComposeChar(char_type c);
        ///
        static bool isArabicSpecialChar(char_type c);
diff --git a/src/rowpainter.cpp b/src/rowpainter.cpp
index 5b69701..ea0f946 100644
--- a/src/rowpainter.cpp
+++ b/src/rowpainter.cpp
@@ -161,72 +161,7 @@ void RowPainter::paintInset(Inset const * inset, pos_type 
const pos)
 }
 
 
-void RowPainter::paintHebrewComposeChar(pos_type & vpos, FontInfo const & font)
-{
-       pos_type pos = bidi_.vis2log(vpos);
-
-       docstring str;
-
-       // first char
-       char_type c = par_.getChar(pos);
-       str += c;
-       ++vpos;
-
-       int const width = theFontMetrics(font).width(c);
-       int dx = 0;
-
-       for (pos_type i = pos - 1; i >= 0; --i) {
-               c = par_.getChar(i);
-               if (!Encodings::isHebrewComposeChar(c)) {
-                       if (isPrintableNonspace(c)) {
-                               int const width2 = pm_.singleWidth(i,
-                                       text_metrics_.displayFont(pit_, i));
-                               dx = (c == 0x05e8 || // resh
-                                     c == 0x05d3)   // dalet
-                                       ? width2 - width
-                                       : (width2 - width) / 2;
-                       }
-                       break;
-               }
-       }
-
-       // Draw nikud
-       pi_.pain.text(int(x_) + dx, yo_, str, font);
-}
-
-
-void RowPainter::paintArabicComposeChar(pos_type & vpos, FontInfo const & font)
-{
-       pos_type pos = bidi_.vis2log(vpos);
-       docstring str;
-
-       // first char
-       char_type c = par_.getChar(pos);
-       c = par_.transformChar(c, pos);
-       str += c;
-       ++vpos;
-
-       int const width = theFontMetrics(font).width(c);
-       int dx = 0;
-
-       for (pos_type i = pos - 1; i >= 0; --i) {
-               c = par_.getChar(i);
-               if (!Encodings::isArabicComposeChar(c)) {
-                       if (isPrintableNonspace(c)) {
-                               int const width2 = pm_.singleWidth(i,
-                                               text_metrics_.displayFont(pit_, 
i));
-                               dx = (width2 - width) / 2;
-                       }
-                       break;
-               }
-       }
-       // Draw nikud
-       pi_.pain.text(int(x_) + dx, yo_, str, font);
-}
-
-
-void RowPainter::paintChars(pos_type & vpos, FontInfo const & font,
-                           bool hebrew, bool arabic)
+void RowPainter::paintChars(pos_type & vpos, Font const & font)
 {
        // This method takes up 70% of time when typing
        pos_type pos = bidi_.vis2log(vpos);
@@ -236,15 +171,21 @@ void RowPainter::paintChars(pos_type & vpos, FontInfo 
const & font,
        str.reserve(100);
        str.push_back(prev_char);
 
+       // special case for arabic
+       string const & lang = font.language()->lang();
+       bool const swap_paren = lang == "arabic_arabtex"
+               || lang == "arabic_arabi"
+               || lang == "farsi";
+
        // FIXME: Why only round brackets and why the difference to
        // Hebrew? See also Paragraph::getUChar
-       if (arabic) {
+       if (swap_paren) {
                char_type c = str[0];
                if (c == '(')
                        c = ')';
                else if (c == ')')
                        c = '(';
-               str[0] = par_.transformChar(c, pos);
+               str[0] = c;
        }
 
        pos_type const end = row_.endpos();
@@ -260,6 +201,12 @@ void RowPainter::paintChars(pos_type & vpos, FontInfo 
const & font,
        bool const spell_state =
                lyxrc.spellcheck_continuously && par_.isMisspelled(pos);
 
+       // are we building a RtL string? 
+       //FIXME: I would like to use the new isRTL() from textutils.h,
+       // but it does not give the same results for some reason I do
+       // not understand.
+       bool const rtl = Encodings::isArabicChar(str[0]) || 
Encodings::isHebrewChar(str[0]);
+
        // collect as much similar chars as we can
        for (++vpos ; vpos < end ; ++vpos) {
                if (lyxrc.force_paint_single_char)
@@ -287,48 +234,27 @@ void RowPainter::paintChars(pos_type & vpos, FontInfo 
const & font,
 
                char_type c = par_.getChar(pos);
 
-               if (c == '\t')
-                       break;
-
-               if (!isPrintableNonspace(c))
-                       break;
-
-               /* Because we do our own bidi, at this point the strings are
-                * already in visual order. However, Qt also applies its own
-                * bidi algorithm to strings that it paints to the screen.
-                * Therefore, if we were to paint Hebrew/Arabic words as a
-                * single string, the letters in the words would get reversed
-                * again. In order to avoid that, we don't collect Hebrew/
-                * Arabic characters, but rather paint them one at a time.
-                * See also 
http://thread.gmane.org/gmane.editors.lyx.devel/79740
-                */
-               if (hebrew)
+               //FIXME: I would like to use the new isRTL() from textutils.h,
+               // but it does not give the same results for some reason I do
+               // not understand.
+               bool const new_rtl = Encodings::isArabicChar(c) || 
Encodings::isHebrewChar(c);
+               if (new_rtl != rtl)
+                       // String direction has changed
                        break;
 
-               /* FIXME: these checks are irrelevant, since 'arabic' and
-                * 'hebrew' alone are already going to trigger a break.
-                * However, this should not be removed completely, because
-                * if an alternative solution is found which allows grouping
-                * of arabic and hebrew characters, then these breaks may have
-                * to be re-applied.
-
-               if (arabic && Encodings::isArabicComposeChar(c))
+               if (c == '\t')
                        break;
 
-               if (hebrew && Encodings::isHebrewComposeChar(c))
+               if (!isPrintableNonspace(c))
                        break;
-               */
 
                // FIXME: Why only round brackets and why the difference to
                // Hebrew? See also Paragraph::getUChar
-               if (arabic) {
+               if (swap_paren) {
                        if (c == '(')
                                c = ')';
                        else if (c == ')')
                                c = '(';
-                       c = par_.transformChar(c, pos);
-                       /* see comment in hebrew, explaining why we break */
-                       break;
                }
 
                str.push_back(c);
@@ -337,15 +263,27 @@ void RowPainter::paintChars(pos_type & vpos, FontInfo 
const & font,
 
        docstring s(&str[0], str.size());
 
+       /* Because we do our own bidi, at this point the strings are
+        * already in visual order. However, Qt also applies its own
+        * bidi algorithm to strings that it paints to the screen.
+        * Therefore, if we were to paint Hebrew/Arabic words as a
+        * single string, the letters in the words would get reversed
+        * again. In order to avoid that, we reverse the string in advance.
+        * See also http://thread.gmane.org/gmane.editors.lyx.devel/79740
+        * for an earlier thread on the subject
+        */
+       if (rtl)
+               reverse(s.begin(), s.end());
+
        if (s[0] == '\t')
                s.replace(0,1,from_ascii("    "));
 
        if (!selection && !change_running.changed()) {
-               x_ += pi_.pain.text(int(x_), yo_, s, font);
+               x_ += pi_.pain.text(int(x_), yo_, s, font.fontInfo());
                return;
        }
 
-       FontInfo copy = font;
+       FontInfo copy = font.fontInfo();
        if (change_running.changed())
                copy.setPaintColor(change_running.color());
        else if (selection)
@@ -394,36 +332,14 @@ void RowPainter::paintMisspelledMark(double orig_x, bool 
changed)
 void RowPainter::paintFromPos(pos_type & vpos, bool changed)
 {
        pos_type const pos = bidi_.vis2log(vpos);
-       Font const orig_font = text_metrics_.displayFont(pit_, pos);
+       Font const font = text_metrics_.displayFont(pit_, pos);
        double const orig_x = x_;
 
-       // usual characters, no insets
-       char_type const c = par_.getChar(pos);
-
-       // special case languages
-       string const & lang = orig_font.language()->lang();
-       bool const hebrew = lang == "hebrew";
-       bool const arabic = lang == "arabic_arabtex" || lang == "arabic_arabi" 
||
-                                               lang == "farsi";
-
-       // spelling correct?
-       bool const misspelled =
-               lyxrc.spellcheck_continuously && par_.isMisspelled(pos);
-
-       // draw as many chars as we can
-       if ((!hebrew && !arabic)
-               || (hebrew && !Encodings::isHebrewComposeChar(c))
-               || (arabic && !Encodings::isArabicComposeChar(c))) {
-               paintChars(vpos, orig_font.fontInfo(), hebrew, arabic);
-       } else if (hebrew) {
-               paintHebrewComposeChar(vpos, orig_font.fontInfo());
-       } else if (arabic) {
-               paintArabicComposeChar(vpos, orig_font.fontInfo());
-       }
-
-       paintForeignMark(orig_x, orig_font.language());
+       paintChars(vpos, font);
+       paintForeignMark(orig_x, font.language());
 
-       if (lyxrc.spellcheck_continuously && misspelled) {
+       // Paint the spelling mark if needed.
+       if (lyxrc.spellcheck_continuously && par_.isMisspelled(pos)) {
                // check for cursor position
                // don't draw misspelled marker for words at cursor position
                // we don't want to disturb the process of text editing
diff --git a/src/rowpainter.h b/src/rowpainter.h
index 644cb37..ae1a590 100644
--- a/src/rowpainter.h
+++ b/src/rowpainter.h
@@ -61,10 +61,7 @@ private:
        void paintSeparator(double orig_x, double width, FontInfo const & font);
        void paintForeignMark(double orig_x, Language const * lang, int desc = 
0);
        void paintMisspelledMark(double orig_x, bool changed);
-       void paintHebrewComposeChar(pos_type & vpos, FontInfo const & font);
-       void paintArabicComposeChar(pos_type & vpos, FontInfo const & font);
-       void paintChars(pos_type & vpos, FontInfo const & font,
-                       bool hebrew, bool arabic);
+       void paintChars(pos_type & vpos, Font const & font);
        int paintAppendixStart(int y);
        void paintFromPos(pos_type & vpos, bool changed);
        void paintInset(Inset const * inset, pos_type const pos);
diff --git a/src/support/lstrings.cpp b/src/support/lstrings.cpp
index 8508e4e..f06b2eb 100644
--- a/src/support/lstrings.cpp
+++ b/src/support/lstrings.cpp
@@ -161,6 +161,29 @@ bool isNumber(char_type c)
 }
 
 
+bool isRTL(char_type c)
+{
+       if (!is_utf16(c))
+               // assume that no non-utf16 character is right-to-left
+               // c outside the UCS4 range is catched as well
+               return false;
+       QChar::Direction direction = ucs4_to_qchar(c).direction();
+       /**
+        * See for example:
+        *  http://en.wikipedia.org/wiki/Template:Bidi_Class_%28Unicode%29.
+        * Here we only handle the easy cases:
+        *  * R:  Hebrew alphabet and related punctuation
+        *  * AL: Arabic, Thaana and Syriac alphabets, and most
+        *        punctuation specific to those scripts
+        *
+        * FIXME: testing show that this does not work (see
+        * RowPainter::paintChars), but my knowledge of unicode is too
+        * poor to understand why.
+        */
+       return direction == QChar::DirR || direction ==QChar::DirAL;
+}
+
+
 bool isDigitASCII(char_type c)
 {
        return '0' <= c && c <= '9';
diff --git a/src/support/textutils.h b/src/support/textutils.h
index 78e34cb..4cb304a 100644
--- a/src/support/textutils.h
+++ b/src/support/textutils.h
@@ -44,6 +44,9 @@ bool isSpace(char_type c);
 /// return true if a unicode char is a numeral.
 bool isNumber(char_type c);
 
+/// return true is a unicode char uses a right-to-left direction for layout
+bool isRTL(char_type c);
+
 /// return whether \p c is a digit in the ASCII range
 bool isDigitASCII(char_type c);
 

Reply via email to