Patch 8.2.2278
Problem:    Falling back to old regexp engine can some patterns.
Solution:   Do not fall back once [[:lower:]] or [[:upper:]] is used.
            (Christian Brabandt, closes #7572)
Files:      src/regexp.c, src/regexp_nfa.c, src/testdir/test_regexp_utf8.vim


*** ../vim-8.2.2277/src/regexp.c        2020-09-07 18:53:18.383974577 +0200
--- src/regexp.c        2021-01-02 17:34:22.627683939 +0100
***************
*** 294,299 ****
--- 294,300 ----
  
  static char_u *regparse;      // Input-scan pointer.
  static int    regnpar;        // () count.
+ static int    wants_nfa;      // regex should use NFA engine
  #ifdef FEAT_SYN_HL
  static int    regnzpar;       // \z() count.
  static int    re_has_z;       // \z item detected
***************
*** 381,386 ****
--- 382,390 ----
  static char_u *cstrchr(char_u *, int);
  static int    re_mult_next(char *what);
  static int    reg_iswordc(int);
+ #ifdef FEAT_EVAL
+ static void report_re_switch(char_u *pat);
+ #endif
  
  static regengine_T bt_regengine;
  static regengine_T nfa_regengine;
***************
*** 2662,2668 ****
      if (prog == NULL)
      {
  #ifdef BT_REGEXP_DEBUG_LOG
!       if (regexp_engine != BACKTRACKING_ENGINE)   // debugging log for NFA
        {
            FILE *f;
            f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a");
--- 2666,2672 ----
      if (prog == NULL)
      {
  #ifdef BT_REGEXP_DEBUG_LOG
!       if (regexp_engine == BACKTRACKING_ENGINE)   // debugging log for BT 
engine
        {
            FILE *f;
            f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a");
***************
*** 2686,2691 ****
--- 2690,2698 ----
                                          && called_emsg == called_emsg_before)
        {
            regexp_engine = BACKTRACKING_ENGINE;
+ #ifdef FEAT_EVAL
+           report_re_switch(expr);
+ #endif
            prog = bt_regengine.regcomp(expr, re_flags);
        }
      }
*** ../vim-8.2.2277/src/regexp_nfa.c    2020-12-21 18:23:56.687287184 +0100
--- src/regexp_nfa.c    2021-01-02 17:40:10.658093030 +0100
***************
*** 253,258 ****
--- 253,264 ----
  static int *post_start;  // holds the postfix form of r.e.
  static int *post_end;
  static int *post_ptr;
+ 
+ // Set when the pattern should use the NFA engine.
+ // E.g. [[:upper:]] only allows 8bit characters for BT engine,
+ // while NFA engine handles multibyte characters correctly.
+ static int wants_nfa;
+ 
  static int nstate;    // Number of states in the NFA.
  static int istate;    // Index in the state vector, used in alloc_state()
  
***************
*** 306,311 ****
--- 312,318 ----
        return FAIL;
      post_ptr = post_start;
      post_end = post_start + nstate_max;
+     wants_nfa = FALSE;
      rex.nfa_has_zend = FALSE;
      rex.nfa_has_backref = FALSE;
  
***************
*** 1707,1712 ****
--- 1714,1720 ----
                                    EMIT(NFA_CLASS_GRAPH);
                                    break;
                                case CLASS_LOWER:
+                                   wants_nfa = TRUE;
                                    EMIT(NFA_CLASS_LOWER);
                                    break;
                                case CLASS_PRINT:
***************
*** 1719,1724 ****
--- 1727,1733 ----
                                    EMIT(NFA_CLASS_SPACE);
                                    break;
                                case CLASS_UPPER:
+                                   wants_nfa = TRUE;
                                    EMIT(NFA_CLASS_UPPER);
                                    break;
                                case CLASS_XDIGIT:
***************
*** 2137,2145 ****
  
            // The engine is very inefficient (uses too many states) when the
            // maximum is much larger than the minimum and when the maximum is
!           // large.  Bail out if we can use the other engine.
            if ((nfa_re_flags & RE_AUTO)
!                                  && (maxval > 500 || maxval > minval + 200))
                return FAIL;
  
            // Ignore previous call to nfa_regatom()
--- 2146,2160 ----
  
            // The engine is very inefficient (uses too many states) when the
            // maximum is much larger than the minimum and when the maximum is
!           // large.  However, when maxval is MAX_LIMIT, it is okay, as this
!           // will emit NFA_STAR.
!           // Bail out if we can use the other engine, but only, when the
!           // pattern does not need the NFA engine like (e.g. [[:upper:]]\{2,\}
!           // does not work with with characters > 8 bit with the BT engine)
            if ((nfa_re_flags & RE_AUTO)
!                                  && (maxval > 500 || maxval > minval + 200)
!                                  && (maxval != MAX_LIMIT && minval < 200)
!                                  && !wants_nfa)
                return FAIL;
  
            // Ignore previous call to nfa_regatom()
*** ../vim-8.2.2277/src/testdir/test_regexp_utf8.vim    2020-12-21 
14:54:28.844116987 +0100
--- src/testdir/test_regexp_utf8.vim    2021-01-02 17:34:22.627683939 +0100
***************
*** 510,515 ****
--- 510,561 ----
    bwipe!
  endfunc
  
+ " Check that [[:upper:]] matches for automatic engine
+ func Test_match_char_class_upper()
+   new
+   let _engine=&regexpengine
  
+   " Test 1: [[:upper:]]\{2,\}
+   set regexpengine=0
+   call setline(1, ['05. ПЕСНЯ О ГЕРОЯХ муз. А. Давиденко, М. Коваля и Б. 
Шехтера ...', '05. PJESNJA O GJEROJAKH mus. A. Davidjenko, M. Kovalja i B. 
Shjekhtjera ...'])
+   call cursor(1,1)
+   let search_cmd='norm /\<[[:upper:]]\{2,\}\>' .. "\<CR>"
+   exe search_cmd
+   call assert_equal(4, searchcount().total, 'TEST 1')
+   set regexpengine=1
+   exe search_cmd
+   call assert_equal(2, searchcount().total, 'TEST 1')
+   set regexpengine=2
+   exe search_cmd
+   call assert_equal(4, searchcount().total, 'TEST 1')
+ 
+   " Test 2: [[:upper:]].\+
+   let search_cmd='norm /\<[[:upper:]].\+\>' .. "\<CR>"
+   set regexpengine=0
+   exe search_cmd
+   call assert_equal(2, searchcount().total, 'TEST 2')
+   set regexpengine=1
+   exe search_cmd
+   call assert_equal(1, searchcount().total, 'TEST 2')
+   set regexpengine=2
+   exe search_cmd
+   call assert_equal(2, searchcount().total, 'TEST 2')
+ 
+   " Test 3: [[:lower:]]\+
+   let search_cmd='norm /\<[[:lower:]]\+\>' .. "\<CR>"
+   set regexpengine=0
+   exe search_cmd
+   call assert_equal(4, searchcount().total, 'TEST 3 lower')
+   set regexpengine=1
+   exe search_cmd
+   call assert_equal(2, searchcount().total, 'TEST 3 lower')
+   set regexpengine=2
+   exe search_cmd
+   call assert_equal(4, searchcount().total, 'TEST 3 lower')
+ 
+   " clean up
+   let &regexpengine=_engine
+   bwipe!
+ endfunc
  
  " vim: shiftwidth=2 sts=2 expandtab
*** ../vim-8.2.2277/src/version.c       2021-01-02 17:06:12.538765972 +0100
--- src/version.c       2021-01-02 17:35:54.359249438 +0100
***************
*** 752,753 ****
--- 752,755 ----
  {   /* Add new patch number below this line */
+ /**/
+     2278,
  /**/

-- 
How many light bulbs does it take to change a person?

 /// Bram Moolenaar -- [email protected] -- http://www.Moolenaar.net   \\\
///        sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
\\\  an exciting new programming language -- http://www.Zimbu.org        ///
 \\\            help me help AIDS victims -- http://ICCF-Holland.org    ///

-- 
-- 
You received this message from the "vim_dev" maillist.
Do not top-post! Type your reply below the text you are replying to.
For more information, visit http://www.vim.org/maillist.php

--- 
You received this message because you are subscribed to the Google Groups 
"vim_dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To view this discussion on the web visit 
https://groups.google.com/d/msgid/vim_dev/202101021644.102GiI1Y1893653%40masaka.moolenaar.net.

Raspunde prin e-mail lui