This is for Adv-Find only. Yuriy, could you please check if this meets your requirements with Cyrillic chars?
Kornel
diff --git a/src/lyxfind.cpp b/src/lyxfind.cpp index 20e1104aa6..e73c6d2a2d 100644 --- a/src/lyxfind.cpp +++ b/src/lyxfind.cpp @@ -791,17 +791,17 @@ bool regex_replace(string const & s, string & t, string const & searchstr, ** \frac{.*}{x} matches \frac{x+\frac{y}{x}}{z} with .* being 'x+\frac{y'. ** ** @param unmatched ** Number of open braces that must remain open at the end for the verification to succeed. **/ -bool braces_match(string::const_iterator const & beg, - string::const_iterator const & end, +bool braces_match(docstring::const_iterator const & beg, + docstring::const_iterator const & end, int unmatched = 0) { int open_pars = 0; - string::const_iterator it = beg; - LYXERR(Debug::FIND, "Checking " << unmatched << " unmatched braces in '" << string(beg, end) << "'"); + docstring::const_iterator it = beg; + LYXERR(Debug::FIND, "Checking " << unmatched << " unmatched braces in '" << docstring(beg, end) << "'"); for (; it != end; ++it) { // Skip escaped braces in the count if (*it == '\\') { ++it; if (it == end) @@ -835,10 +835,13 @@ public: MatchResult(): match_len(0),match2end(0), pos(0) {}; }; /** The class performing a match between a position in the document and the FindAdvOptions. **/ +typedef basic_regex<char_type> docregex; +typedef regex_iterator<docstring::const_iterator> docregex_iterator; + class MatchStringAdv { public: MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const & opt); /** Tests if text starting at the supplied position matches with the one provided to the MatchStringAdv @@ -880,13 +883,13 @@ private: **/ string normalize(docstring const & s, bool hack_braces) const; // normalized string to search string par_as_string; // regular expression to use for searching - regex regexp; + docregex regexp; // same as regexp, but prefixed with a ".*?" - regex regexp2; + docregex regexp2; // leading format material as string string lead_as_string; // par_as_string after removal of lead_as_string string par_as_string_nolead; // unmatched open braces in the search string/regexp @@ -1318,10 +1321,26 @@ static void buildAccentsMap() accents["negthinspace"] = u8"\uf0003"; // to omit backslashed latex macros accents["medspace"] = u8"\uf0004"; // See https://en.wikipedia.org/wiki/Private_Use_Areas accents["negmedspace"] = u8"\uf0005"; accents["thickspace"] = u8"\uf0006"; accents["negthickspace"] = u8"\uf0007"; + accents["lyx"] = u8"\uf0010"; // Used logos + accents["LyX"] = u8"\uf0010"; + accents["tex"] = u8"\uf0011"; + accents["TeX"] = u8"\uf0011"; + accents["latex"] = u8"\uf0012"; + accents["LaTeX"] = u8"\uf0012"; + accents["latexe"] = u8"\uf0013"; + accents["LaTeXe"] = u8"\uf0013"; + accents["backslash lyx"] = u8"\uf0010"; // Used logos inserted with starting \backslash + accents["backslash LyX"] = u8"\uf0010"; + accents["backslash tex"] = u8"\uf0011"; + accents["backslash TeX"] = u8"\uf0011"; + accents["backslash latex"] = u8"\uf0012"; + accents["backslash LaTeX"] = u8"\uf0012"; + accents["backslash latexe"] = u8"\uf0013"; + accents["backslash LaTeXe"] = u8"\uf0013"; accents["ddot{\\imath}"] = "ï"; buildaccent("ddot", "aAeEhHiIioOtuUwWxXyY", "äÃëÃḧḦïÃïöÃáºÃ¼ÃẠáºáºáºÃ¿Å¸"); // umlaut buildaccent("dot|.", "aAbBcCdDeEfFGghHIimMnNoOpPrRsStTwWxXyYzZ", "ȧȦá¸á¸ÄÄá¸á¸ÄÄá¸á¸Ä ġḣḢİİá¹á¹á¹ á¹È¯È®á¹á¹á¹á¹á¹¡á¹ ṫṪáºáºáºáºáºáºÅ¼Å»"); // dot{i} can only happen if ignoring case, but there is no lowercase of 'Ä°' @@ -1381,11 +1400,13 @@ static void buildAccentsMap() */ void Intervall::removeAccents() { if (accents.empty()) buildAccentsMap(); - static regex const accre("\\\\(([\\S]|grave|breve|ddot|dot|acute|dacute|mathring|check|hat|bar|tilde|subdot|ogonek|cedilla|subring|textsubring|subhat|textsubcircum|subtilde|textsubtilde|dgrave|textdoublegrave|rcap|textroundcap|slashed)\\{[^\\{\\}]+\\}|(i|imath|jmath|cdot|[a-z]+space)(?![a-zA-Z]))"); + static regex const accre("\\\\(([\\S]|grave|breve|ddot|dot|acute|dacute|mathring|check|hat|bar|tilde|subdot|ogonek|" + "cedilla|subring|textsubring|subhat|textsubcircum|subtilde|textsubtilde|dgrave|textdoublegrave|rcap|textroundcap|slashed)\\{[^\\{\\}]+\\}" + "|((i|imath|jmath|cdot|[a-z]+space)|((backslash )?([lL]y[xX]|[tT]e[xX]|[lL]a[tT]e[xX]e?)))(?![a-zA-Z]))"); smatch sub; for (sregex_iterator itacc(par.begin(), par.end(), accre), end; itacc != end; ++itacc) { sub = *itacc; string key = sub.str(1); if (accents.find(key) != accents.end()) { @@ -2089,11 +2110,11 @@ void LatexInfo::buildKeys(bool isPatternString) makeKey("parbox", KeyInfo(KeyInfo::doRemove, 1, true), isPatternString); // like ('tiny{}' or '\tiny ' ... ) makeKey("footnotesize|tiny|scriptsize|small|large|Large|LARGE|huge|Huge", KeyInfo(KeyInfo::isSize, 0, false), isPatternString); // Survives, like known character - makeKey("lyx|LyX|latex|LaTeX|latexe|LaTeXe|tex|TeX", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); + // makeKey("lyx|LyX|latex|LaTeX|latexe|LaTeXe|tex|TeX", KeyInfo(KeyInfo::isChar, 0, false), isPatternString); makeKey("item|listitem", KeyInfo(KeyInfo::isList, 1, false), isPatternString); makeKey("begin|end", KeyInfo(KeyInfo::isMath, 1, false), isPatternString); makeKey("[|]", KeyInfo(KeyInfo::isMath, 1, false), isPatternString); makeKey("$", KeyInfo(KeyInfo::isMath, 1, false), isPatternString); @@ -2863,24 +2884,24 @@ MatchStringAdv::MatchStringAdv(lyx::Buffer & buf, FindAndReplaceOptions const & } regexp_str = "(" + lead_as_regexp + ")" + par_as_string; regexp2_str = "(" + lead_as_regexp + ").*?" + par_as_string; } LYXERR(Debug::FIND, "Setting regexp to : '" << regexp_str << "'"); - regexp = regex(regexp_str); + regexp = docregex(from_utf8(regexp_str)); LYXERR(Debug::FIND, "Setting regexp2 to: '" << regexp2_str << "'"); - regexp2 = regex(regexp2_str); + regexp2 = docregex(from_utf8(regexp2_str)); } } // Count number of characters in string // {]} ==> 1 // \& ==> 1 // --- ==> 1 // \\[a-zA-Z]+ ==> 1 -static int computeSize(string s, int len) +static int computeSize(docstring s, int len) { if (len == 0) return 0; int skip = 1; int count = 0; @@ -2943,32 +2964,26 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be return mres; } LYXERR(Debug::FIND, "Matching against '" << lyx::to_utf8(docstr) << "'"); LYXERR(Debug::FIND, "After normalization: '" << str << "'"); + docstr = from_utf8(str); if (use_regexp) { LYXERR(Debug::FIND, "Searching in regexp mode: at_begin=" << at_begin); - regex const *p_regexp; + docregex const *p_regexp; regex_constants::match_flag_type flags; if (at_begin) { flags = regex_constants::match_continuous; p_regexp = ®exp; } else { flags = regex_constants::match_default; p_regexp = ®exp2; } - sregex_iterator re_it(str.begin(), str.end(), *p_regexp, flags); - if (re_it == sregex_iterator()) + docregex_iterator re_it(docstr.begin(), docstr.end(), *p_regexp, flags); + if (re_it == docregex_iterator()) return mres; - match_results<string::const_iterator> const & m = *re_it; - - if (0) { // Kornel Benko: DO NOT CHECKK - // Check braces on the segment that matched the entire regexp expression, - // plus the last subexpression, if a (.*?) was inserted in the constructor. - if (!braces_match(m[0].first, m[0].second, open_braces)) - return mres; - } + match_results<docstring::const_iterator> const & m = *re_it; // Check braces on segments that matched all (.*?) subexpressions, // except the last "padding" one inserted by lyx. for (size_t i = 1; i < m.size() - 1; ++i) if (!braces_match(m[i].first, m[i].second, open_braces)) @@ -2994,21 +3009,21 @@ MatchResult MatchStringAdv::findAux(DocIterator const & cur, int len, bool at_be result = m[m.size() - close_wildcards].first - m[0].first; size_t pos = m.position(size_t(0)); // Ignore last closing characters while (result > 0) { - if (str[pos+result-1] == '}') + if (docstr[pos+result-1] == '}') --result; else break; } if (result > leadingsize) result -= leadingsize; else result = 0; - mres.match_len = computeSize(str.substr(pos+leadingsize,result), result); - mres.match2end = str.size() - pos - leadingsize; + mres.match_len = computeSize(docstr.substr(pos+leadingsize,result), result); + mres.match2end = docstr.size() - pos - leadingsize; mres.pos = pos+leadingsize; return mres; } // else !use_regexp: but all code paths above return
pgp4um6LBeUEu.pgp
Description: Digitale Signatur von OpenPGP
-- lyx-devel mailing list lyx-devel@lists.lyx.org http://lists.lyx.org/mailman/listinfo/lyx-devel