[LyX/master] Added better handling for languages and colors for advanced F&R

Kornel Benko Fri, 05 Oct 2018 11:29:16 -0700

commit b78bdf80a8e1954e1075500d896d95cab10beff1
Author: Kornel Benko <kor...@lyx.org>
Date:   Fri Oct 5 20:26:44 2018 +0200


    Added better handling for languages and colors for advanced F&R
    
    The change is significant if the search format is not disabled.
    We try to analyze the pattern string first to get needed features
    for the search.
    We try to analyse the searched string and if it does not
    contain all expected featers (color, language, char style, char decoration)
    
    Still some problems though
---
 src/lyxfind.cpp      |  205 +++++++++++++++++++++++++++++++++++++++++++++++---
 src/output_latex.cpp |   13 ++-
 2 files changed, 202 insertions(+), 16 deletions(-)

diff --git a/src/lyxfind.cpp b/src/lyxfind.cpp
index 936ea24..cbb9289 100644
--- a/src/lyxfind.cpp
+++ b/src/lyxfind.cpp
@@ -52,6 +52,7 @@
 #include "support/lstrings.h"
 
 #include "support/regex.h"
+#include <map>
 
 using namespace std;
 using namespace lyx::support;
@@ -799,6 +800,7 @@ static docstring buffer_to_latex(Buffer & buffer)
        runparams.linelen = 80; //lyxrc.plaintext_linelen;
        // No side effect of file copying and image conversion
        runparams.dryrun = true;
+       runparams.for_search = true;
        pit_type const endpit = buffer.paragraphs().size();
        for (pit_type pit = 0; pit != endpit; ++pit) {
                TeXOnePar(buffer, buffer.text(), pit, os, runparams);
@@ -843,15 +845,167 @@ static size_t identifyLeading(string const & s)
        // @TODO Support \item[text]
        // Kornel: Added textsl, textsf, textit, texttt and noun
        // + allow to seach for colored text too
-       while (regex_replace(t, t, REGEX_BOS 
"\\\\(emph|noun|text(bf|sl|sf|it|tt|color\\{[a-z]+\\})|(u|uu)line|(s|x)out|uwave|subsubsection|subsection|section|subparagraph|paragraph|part)\\*?\\{",
 "")
+       while (regex_replace(t, t, REGEX_BOS 
"\\\\(((emph|noun|text(bf|sl|sf|it|tt))|((textcolor|foreignlanguage)\\{[a-z]+\\})|(u|uu)line|(s|x)out|uwave)|((sub)?(((sub)?section)|paragraph)|part)\\*?)\\{",
 "")
               || regex_replace(t, t, REGEX_BOS "\\$", "")
               || regex_replace(t, t, REGEX_BOS "\\\\\\[ ", "")
               || regex_replace(t, t, REGEX_BOS "\\\\item ", "")
               || regex_replace(t, t, REGEX_BOS "\\\\begin\\{[a-zA-Z_]*\\*?\\} 
", ""))
-               LYXERR(Debug::FIND, "  after removing leading $, \\[ , \\emph{, 
\\textbf{, etc.: '" << t << "'");
+              ;
+       LYXERR(Debug::FIND, "  after removing leading $, \\[ , \\emph{, 
\\textbf{, etc.: '" << t << "'");
        return s.find(t);
 }
 
+typedef map<string, bool> Features;
+
+static Features identifyFeatures(string const & s)
+{
+       static regex const feature("\\\\(([a-z]+(\\{([a-z]+)\\}|\\*)?))\\{");
+       static regex const 
valid("^(((emph|noun|text(bf|sl|sf|it|tt)|(textcolor|foreignlanguage)\\{[a-z]+\\})|(u|uu)line|(s|x)out|uwave)|((sub)?(((sub)?section)|paragraph)|part)\\*?)$");
+       smatch sub;
+       bool displ = true;
+       Features info;
+
+       for (sregex_iterator it(s.begin(), s.end(), feature), end; it != end; 
++it) {
+               sub = *it;
+               if (displ) {
+                       if (sub.str(1).compare("regexp") == 0) {
+                               displ = false;
+                               continue;
+                       }
+                       string token = sub.str(1);
+                       smatch sub2;
+                       if (regex_match(token, sub2, valid)) {
+                               info[token] = true;
+                       }
+                       else {
+                               // ignore
+                       }
+               }
+               else {
+                       if (sub.str(1).compare("endregexp") == 0) {
+                               displ = true;
+                               continue;
+                       }
+               }
+       }
+       return(info);
+}
+
+static int findclosing(string p, int start, int end)
+{
+       int skip = 0;
+       int depth = 0;
+       for (int i = start; i < end; i += 1 + skip) {
+               char c;
+               c = p[i];
+               skip = 0;
+               if (c == '\\') skip = 1;
+               else if (c == '{') depth++;
+               else if (c == '}') {
+                       if (depth == 0) return(i);
+                       --depth;
+               }
+       }
+       return(-1);
+}
+
+
+static string correctlanguagesetting(string par, bool from_regex, bool 
withformat)
+{
+       static string langstart = "\\foreignlanguage{";
+       static int llen = langstart.length();
+       static bool removefirstlang = false;
+       static Features regex_f;
+       static int missed = 0;
+       static bool regex_with_format = false;
+
+       int parlen = par.length();
+       string result = par;
+
+       while ((parlen > 0) && (par[parlen-1] == '\n')) {
+               parlen--;
+       }
+       if (from_regex) {
+               missed = 0;
+               if (withformat) {
+                       regex_f = identifyFeatures(par);
+                       for (auto it = regex_f.cbegin(); it != regex_f.cend(); 
++it) {
+                               string a = it->first;
+                               regex_with_format = true;
+                               // LYXERR0("Identified regex format:" << a);
+                       }
+
+               }
+       } else if (regex_with_format) {
+               Features info = identifyFeatures(par);
+               for (auto it = regex_f.cbegin(); it != regex_f.cend(); ++it) {
+                       string a = it->first;
+                       bool b = it->second;
+                       if (b && ! info[a]) {
+                               missed++;
+                               // LYXERR0("Missed(" << missed << ", srclen = " 
<< parlen );
+                               return("");
+                       }
+               }
+       }
+       else {
+               // LYXERR0("No regex formats");
+       }
+       if (par.compare(0, llen, langstart) == 0) {
+               if (from_regex) {
+                       removefirstlang = false;
+               }
+               int i = findclosing(par, llen, par.length());
+               if (removefirstlang) {
+                       if (i < 0)
+                               result = "";
+                       else {
+                               int closepos = findclosing(par, i+2, 
par.length());
+                               if (closepos > 0) {
+                                       result = par.substr(i+2, closepos-i-2) 
+ par.substr(closepos+1, parlen - closepos-1);
+                               }
+                               else {
+                                       result = par.substr(i+2, parlen-i-2);
+                               }
+                       }
+               }
+               else if (i > 0) {
+                       // skip '}{' after the language spec
+                       int closepos = findclosing(par, i+2, par.length());
+                       size_t insertpos = par.find(langstart, i+2);
+                       if (closepos < 0) {
+                               if (insertpos == string::npos) {
+                                       // there are no closing in par, and no 
next lang spec
+                                       result = par.substr(0, parlen) + "}";
+                               }
+                               else {
+                                       // Add '}' at insertpos only, because 
closing is missing
+                                       result = par.substr(0,insertpos) + "}" 
+ par.substr(insertpos, parlen-insertpos);
+                               }
+                       }
+                       else if ((size_t) closepos > insertpos) {
+                               // Add '}' at insertpos and remove from 
closepos if closepos > insertpos
+                               result = par.substr(0,insertpos) + "}" + 
par.substr(insertpos, closepos - insertpos) + par.substr(closepos+1, parlen 
-closepos-1);
+                       }
+               }
+               else {
+                       result = par;
+                       // For i == 0, it is empty language spec
+                       // and for i < 0 it is Error
+               }
+       }
+       else {
+               if (from_regex) {
+                       removefirstlang = true;
+               }
+       }
+       // remove possible \inputencoding entries
+       while (regex_replace(result, result, "\\\\inputencoding\\{[^\\}]*}", 
""))
+               ;
+       // Either not found language spec,or is single and closed spec or empty
+       return(result);
+}
+
 
 // Remove trailing closure of math, macros and environments, so to catch parts 
of them.
 static int identifyClosing(string & t)
@@ -887,6 +1041,8 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, 
FindAndReplaceOptions const &
        close_wildcards = 0;
 
        size_t lead_size = 0;
+       // correct the language settings
+       par_as_string = correctlanguagesetting(par_as_string, true, 
!opt.ignoreformat);
        if (opt.ignoreformat) {
                if (!use_regexp) {
                        // if par_as_string_nolead were emty,
@@ -897,6 +1053,7 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, 
FindAndReplaceOptions const &
                }
        } else {
                lead_size = identifyLeading(par_as_string);
+               LYXERR(Debug::FIND, "Lead_size: " << lead_size);
                lead_as_string = par_as_string.substr(0, lead_size);
                par_as_string_nolead = par_as_string.substr(lead_size, 
par_as_string.size() - lead_size);
        }
@@ -985,6 +1142,7 @@ int MatchStringAdv::findAux(DocIterator const & cur, int 
len, bool at_begin) con
 
        docstring docstr = stringifyFromForSearch(opt, cur, len);
        string str = normalize(docstr, true);
+       if (str.empty()) return(-1);
        LYXERR(Debug::FIND, "Matching against     '" << lyx::to_utf8(docstr) << 
"'");
        LYXERR(Debug::FIND, "After normalization: '" << str << "'");
 
@@ -1108,9 +1266,10 @@ string MatchStringAdv::normalize(docstring const & s, 
bool hack_braces) const
        // Kornel: Added textsl, textsf, textit, texttt and noun
        // + allow to seach for colored text too
        LYXERR(Debug::FIND, "Removing stale empty \\emph{}, \\textbf{}, 
\\*section{} macros from: " << t);
-       while (regex_replace(t, t, 
"\\\\(emph|noun|text(bf|sl|sf|it|tt|color\\{[a-z]+\\})|(u|uu)line|(s|x)out|uwave|subsubsection|subsection|section|subparagraph|paragraph|part)(\\{\\})+",
 ""))
+       while (regex_replace(t, t, 
"\\\\((emph|noun|text(bf|sl|sf|it|tt|color\\{[a-z]+\\})|(u|uu)line|(s|x)out|uwave)|((sub)?(((sub)?section)|paragraph)|part)\\*?)(\\{\\})+",
 ""))
                LYXERR(Debug::FIND, "  further removing stale empty \\emph{}, 
\\textbf{} macros from: " << t);
 
+       while (regex_replace(t, t, 
"\\\\foreignlanguage\\{[a-z]+\\}(\\{(\\\\item )?\\})+", ""));
        // FIXME - check what preceeds the brace
        if (hack_braces) {
                if (opt.ignoreformat)
@@ -1185,6 +1344,7 @@ docstring latexifyFromCursor(DocIterator const & cur, int 
len)
        runparams.linelen = 8000; //lyxrc.plaintext_linelen;
        // No side effect of file copying and image conversion
        runparams.dryrun = true;
+       runparams.for_search = true;
 
        if (cur.inTexted()) {
                // @TODO what about searching beyond/across paragraph breaks ?
@@ -1194,6 +1354,9 @@ docstring latexifyFromCursor(DocIterator const & cur, int 
len)
                TeXOnePar(buf, *cur.innerText(), cur.pit(), os, runparams,
                          string(), cur.pos(), endpos);
                LYXERR(Debug::FIND, "Latexified text: '" << 
lyx::to_utf8(ods.str()) << "'");
+               string s = correctlanguagesetting(lyx::to_utf8(ods.str()), 
false, false);
+               LYXERR(Debug::FIND, "Latexified text: '" << s << "'");
+               return(lyx::from_utf8(s));
        } else if (cur.inMathed()) {
                // Retrieve the math environment type, and add '$' or '$[' or 
others (\begin{equation}) accordingly
                for (int s = cur.depth() - 1; s >= 0; --s) {
@@ -1259,12 +1422,13 @@ int findAdvFinalize(DocIterator & cur, MatchStringAdv 
const & match)
        if (cur.pos() + len > cur.lastpos())
                return 0;
        LYXERR(Debug::FIND, "verifying unmatch with len = " << len);
-       while (cur.pos() + len <= cur.lastpos() && match(cur, len) == 0) {
+       while (cur.pos() + len <= cur.lastpos() && match(cur, len) <= 0) {
                ++len;
                LYXERR(Debug::FIND, "verifying unmatch with len = " << len);
        }
        // Length of matched text (different from len param)
        int old_len = match(cur, len);
+       if (old_len < 0) old_len = 0;
        int new_len;
        // Greedy behaviour while matching regexps
        while ((new_len = match(cur, len + 1)) > old_len) {
@@ -1281,27 +1445,46 @@ int findForwardAdv(DocIterator & cur, MatchStringAdv & 
match)
 {
        if (!cur)
                return 0;
+       static int max_missed = 0;
        while (!theApp()->longOperationCancelled() && cur) {
                LYXERR(Debug::FIND, "findForwardAdv() cur: " << cur);
                int match_len = match(cur, -1, false);
                LYXERR(Debug::FIND, "match_len: " << match_len);
-               if (match_len) {
+               if (match_len > 0) {
+                       int count = 0;
+                       int match_len_zero_count = 0;
                        for (; !theApp()->longOperationCancelled() && cur; 
cur.forwardPos()) {
                                LYXERR(Debug::FIND, "Advancing cur: " << cur);
                                int match_len2 = match(cur);
-                               LYXERR(Debug::FIND, "match_len: " << 
match_len2);
-                               if (match_len2) {
+                               LYXERR(Debug::FIND, "match_len2: " << 
match_len2);
+                               if (match_len2 > 0) {
                                        // Sometimes in finalize we understand 
it wasn't a match
                                        // and we need to continue the outest 
loop
                                        int len = findAdvFinalize(cur, match);
-                                       if (len > 0)
+                                       if (len > 0) {
                                                return len;
+                                       }
+                               }
+                               if (match_len2 >= 0) {
+                                       count = 0;
+                                       if (match_len2 == 0)
+                                               match_len_zero_count++;
+                                       else
+                                               match_len_zero_count = 0;
+                               }
+                               else {
+                                       count++;
+                                       if (count > max_missed) max_missed = 
count;
+                                       if (count > 5) {
+                                               LYXERR(Debug::FIND, 
"match_len2_zero_count: " << match_len_zero_count << ", match_len was " << 
match_len);
+                                               break;
+                                       }
                                }
                        }
                        if (!cur)
                                return 0;
                }
-               if (cur.pit() < cur.lastpit()) {
+               if (match_len >= 0 && cur.pit() < cur.lastpit()) {
                        LYXERR(Debug::FIND, "Advancing par: cur=" << cur);
                        cur.forwardPar();
                } else {
@@ -1393,8 +1576,8 @@ int findBackwardsAdv(DocIterator & cur, MatchStringAdv & 
match)
 docstring stringifyFromForSearch(FindAndReplaceOptions const & opt,
                                 DocIterator const & cur, int len)
 {
-       LASSERT(cur.pos() >= 0 && cur.pos() <= cur.lastpos(),
-               return docstring());
+       if (cur.pos() < 0 || cur.pos() > cur.lastpos())
+               return docstring();
        if (!opt.ignoreformat)
                return latexifyFromCursor(cur, len);
        else
diff --git a/src/output_latex.cpp b/src/output_latex.cpp
index f73990d..96f51df 100644
--- a/src/output_latex.cpp
+++ b/src/output_latex.cpp
@@ -814,10 +814,12 @@ void TeXOnePar(Buffer const & buf,
                            || (priorpar->getDepth() == par.getDepth()
                                    && priorpar->layout() != par.layout()));
        Language const * const prev_language =
-               (priorpar && !priorpar->isPassThru())
-               ? (use_prev_env_language ? state->prev_env_language_
-                                        : priorpar->getParLanguage(bparams))
-               : outer_language;
+               runparams_in.for_search ?
+                       languages.getLanguage("ignore")
+               :(priorpar && !priorpar->isPassThru())
+                       ? (use_prev_env_language ? state->prev_env_language_
+                                               : 
priorpar->getParLanguage(bparams))
+                       : outer_language;
 
        bool const use_polyglossia = runparams.use_polyglossia;
        string const par_lang = use_polyglossia ?
@@ -854,7 +856,8 @@ void TeXOnePar(Buffer const & buf,
                && runparams.local_font != 0
                && outer_language->rightToLeft()
                && !par_language->rightToLeft();
-       bool const localswitch = text.inset().forceLocalFontSwitch()
+       bool const localswitch = runparams_in.for_search
+                       || text.inset().forceLocalFontSwitch()
                        || (using_begin_end && text.inset().forcePlainLayout())
                        || in_polyglossia_rtl_env;
        if (localswitch) {

[LyX/master] Added better handling for languages and colors for advanced F&R

Reply via email to