commit ugrep for openSUSE:Factory

Source-Sync Fri, 22 Apr 2022 12:55:51 -0700

Script 'mail_helper' called by obssrc
Hello community,

here is the log from the commit of package ugrep for openSUSE:Factory checked 
in at 2022-04-22 21:54:29
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/ugrep (Old)
 and      /work/SRC/openSUSE:Factory/.ugrep.new.1538 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


Package is "ugrep"

Fri Apr 22 21:54:29 2022 rev:24 rq:971906 version:3.7.9

Changes:
--------
--- /work/SRC/openSUSE:Factory/ugrep/ugrep.changes      2022-04-05 
19:55:58.185888186 +0200
+++ /work/SRC/openSUSE:Factory/.ugrep.new.1538/ugrep.changes    2022-04-22 
21:55:38.966901384 +0200
@@ -1,0 +2,7 @@
+Fri Apr  8 09:49:39 UTC 2022 - Andreas Stieger <andreas.stie...@gmx.de>
+
+- update to 3.7.9:
+  * Additional --stats results with total lines searched and
+    matched, and time elapsed. 
+
+-------------------------------------------------------------------

Old:
----
  ugrep-3.7.7.tar.gz

New:
----
  ugrep-3.7.9.tar.gz

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ ugrep.spec ++++++
--- /var/tmp/diff_new_pack.vxVnCG/_old  2022-04-22 21:55:39.546902045 +0200
+++ /var/tmp/diff_new_pack.vxVnCG/_new  2022-04-22 21:55:39.550902049 +0200
@@ -17,7 +17,7 @@
 
 
 Name:           ugrep
-Version:        3.7.7
+Version:        3.7.9
 Release:        0
 Summary:        Universal grep: a feature-rich grep implementation with focus 
on speed
 License:        BSD-3-Clause

++++++ ugrep-3.7.7.tar.gz -> ugrep-3.7.9.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/ugrep-3.7.7/README.md new/ugrep-3.7.9/README.md
--- old/ugrep-3.7.7/README.md   2022-04-01 21:47:50.000000000 +0200
+++ new/ugrep-3.7.9/README.md   2022-04-07 22:52:38.000000000 +0200
@@ -154,6 +154,7 @@
 - [Download and install](#install)
 - [Performance comparisons](#speed)
 - [Using ugrep within Vim](#vim)
+- [Using ugrep within Emacs](#emacs)
 - [Using ugrep to replace GNU/BSD grep](#grep)
   - [Equivalence to GNU/BSD grep](#equivalence)
   - [Short and quick command aliases](#aliases)
@@ -478,7 +479,7 @@
 
 GREP            | T1       | T2       | T3       | T4       | T5       | T6    
   | T7       | T8       | T9       | T10      | T11      | T12      |
 --------------- | -------- | -------- | -------- | -------- | -------- | 
-------- | -------- | -------- | -------- | -------- | -------- | -------- |
-ugrep           | **0.03** | **0.04** | **6.05** | **0.07** | **0.02** | 
**0.98** | **0.97** | **0.87** | **0.26** | **0.10** | **0.19** | **0.02** |
+ugrep           | **0.02** | **0.03** | **6.05** | **0.07** | **0.02** | 
**0.98** | **0.97** | **0.87** | **0.26** | **0.10** | **0.19** | **0.02** |
 hyperscan grep  | 0.09     | 0.10     | **4.35** | 0.11     | 0.04     | 7.78  
   | 3.39     | 1.41     | 1.17     | *n/a*    | *n/a*    | *n/a*    |
 ripgrep         | 0.06     | 0.10     | 7.50     | 0.19     | 0.06     | 2.20  
   | 2.07     | 2.01     | 2.14     | 0.12     | 0.36     | 0.03     |
 silver searcher | 0.10     | 0.11     | *n/a*    | 0.16     | 0.21     | *n/a* 
   | *n/a*    | *n/a*    | *n/a*    | 0.45     | 0.32     | 0.09     |
@@ -540,11 +541,11 @@
         set grepformat=%f:%l:%c:%m,%f+%l+%c+%m,%-G%f\\\|%l\\\|%c\\\|%m
     endif
 
-This specifies case insensitive searches with the Vim `:grep` command.  For
-case sensitive searches, remove `\ -j` from `grepprg`.  Multiple matches on the
-same line are listed in the quickfix window separately.  If this is not
-desired, remove `\ -u` from `grepprg`.  With this change, only the first match
-on a line is shown.  Option `--ignore-files` skips files specified in
+This specifies `-j` [case insensitive searches](#case) with the Vim `:grep`
+command.  For case sensitive searches, remove `\ -j` from `grepprg`.  Multiple
+matches on the same line are listed in the quickfix window separately.  If this
+is not desired, remove `\ -u` from `grepprg`.  With this change, only the first
+match on a line is shown.  Option `--ignore-files` skips files specified in
 `.gitignore` files, when present.  To limit the depth of recursive searches to
 the current directory only, append `\ -1` to `grepprg`.
 
@@ -615,6 +616,39 @@
 
 ???? [Back to table of contents](#toc)
 
+<a name="emacs"/>
+
+Using ugrep within Emacs
+------------------------
+
+Thanks to [Manuel 
Uberti](https://github.com/emacs-mirror/emacs/commits?author=manuel-uberti),
+you can now use **ugrep** in Emacs.  To use **ugrep** instead of GNU grep
+within Emacs, add the following line to your `.emacs.d/init.el` file:
+
+    (setq-default xref-search-program ???ugrep)
+
+This means that Emacs commands such as `project-find-regexp` that rely on
+[Xref](https://www.gnu.org/software/emacs/manual/html_node/emacs/Xref.html) can
+now leverage the power of **ugrep**.
+
+Furthermore, it is possible to use `grep` in the [Emacs grep
+commands](https://www.gnu.org/software/emacs/manual/html_node/emacs/Grep-Searching.html).
+For instance, you can run `lgrep` with `ugrep` by customizing `grep-template`
+to something like the following:
+
+    (setq-default grep-template "ugrep --color=always -0Iinr -e <R>")
+
+If you do not have Emacs version 29 (or greater) you can download and build
+Emacs from the [Emacs master branch](https://github.com/emacs-mirror/emacs),
+or enable Xref integration with **ugrep** manually:
+
+    (with-eval-after-load 'xref
+     (push '(ugrep . "xargs -0 ugrep <C> --null -ns -e <R>")
+           xref-search-program-alist)
+     (setq-default xref-search-program 'ugrep))
+
+???? [Back to table of contents](#toc)
+
 <a name="grep"/>
 
 Using ugrep to replace GNU/BSD grep
@@ -1656,9 +1690,12 @@
 ### Search non-Unicode files with --encoding
 
     --encoding=ENCODING
-            The input file encoding.
+            The encoding format of the input.  The default ENCODING is binary
+            and UTF-8 which are the same.  Note that option -U specifies binary
+            PATTERN matching (text matching is the default.)
 
-ASCII, UTF-8, UTF-16, and UTF-32 files do not require this option, assuming
+Binary, ASCII and UTF-8 files do not require this option to search them.  Also
+UTF-16 and UTF-32 files do not require this option to search them, assuming
 that UTF-16 and UTF-32 files start with a UTF BOM
 ([byte order mark](https://en.wikipedia.org/wiki/Byte_order_mark)) as usual.
 Other file encodings require option `--encoding=ENCODING`:
@@ -3952,16 +3989,18 @@
                   arguments.
 
            --encoding=ENCODING
-                  The encoding  format  of  the  input,  where  ENCODING  can  
be:
-                  `binary',  `ASCII',  `UTF-8',  `UTF-16', `UTF-16BE', 
`UTF-16LE',
-                  `UTF-32',  `UTF-32BE',   `UTF-32LE',   `LATIN1',   
`ISO-8859-1',
-                  `ISO-8859-2',    `ISO-8859-3',    `ISO-8859-4',    
`ISO-8859-5',
-                  `ISO-8859-6',    `ISO-8859-7',    `ISO-8859-8',    
`ISO-8859-9',
-                  `ISO-8859-10',   `ISO-8859-11',   `ISO-8859-13',  
`ISO-8859-14',
-                  `ISO-8859-15',  `ISO-8859-16',  `MAC',   `MACROMAN',   
`EBCDIC',
-                  `CP437',   `CP850',   `CP858',   `CP1250',  `CP1251',  
`CP1252',
-                  `CP1253',  `CP1254',  `CP1255',  `CP1256',  `CP1257',  
`CP1258',
-                  `KOI8-R', `KOI8-U', `KOI8-RU'.
+                  The encoding format of  the  input.   The  default  ENCODING 
 is
+                  binary and UTF-8 which are the same.  Note that option -U 
speci-
+                  fies binary PATTERN matching (text  matching  is  the  
default.)
+                  ENCODING   can   be:   `binary',   `ASCII',  `UTF-8',  
`UTF-16',
+                  `UTF-16BE',  `UTF-16LE',   `UTF-32',   `UTF-32BE',   
`UTF-32LE',
+                  `LATIN1',      `ISO-8859-1',     `ISO-8859-2',     
`ISO-8859-3',
+                  `ISO-8859-4',    `ISO-8859-5',    `ISO-8859-6',    
`ISO-8859-7',
+                  `ISO-8859-8',    `ISO-8859-9',   `ISO-8859-10',   
`ISO-8859-11',
+                  `ISO-8859-13',  `ISO-8859-14',   `ISO-8859-15',   
`ISO-8859-16',
+                  `MAC',   `MACROMAN',   `EBCDIC',   `CP437',   `CP850',  
`CP858',
+                  `CP1250',  `CP1251',  `CP1252',  `CP1253',  `CP1254',  
`CP1255',
+                  `CP1256', `CP1257', `CP1258', `KOI8-R', `KOI8-U', `KOI8-RU'.
 
            --exclude=GLOB
                   Skip files whose name matches GLOB using wildcard matching, 
same
@@ -5099,7 +5138,7 @@
 
 
 
-    ugrep 3.7.7                     April 01, 2022                        
UGREP(1)
+    ugrep 3.7.9                     April 07, 2022                        
UGREP(1)
 
 ???? [Back to table of contents](#toc)
 
Binary files old/ugrep-3.7.7/bin/win32/ugrep.exe and 
new/ugrep-3.7.9/bin/win32/ugrep.exe differ
Binary files old/ugrep-3.7.7/bin/win64/ugrep.exe and 
new/ugrep-3.7.9/bin/win64/ugrep.exe differ
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/ugrep-3.7.7/include/reflex/absmatcher.h 
new/ugrep-3.7.9/include/reflex/absmatcher.h
--- old/ugrep-3.7.7/include/reflex/absmatcher.h 2022-04-01 21:47:50.000000000 
+0200
+++ new/ugrep-3.7.9/include/reflex/absmatcher.h 2022-04-07 22:52:38.000000000 
+0200
@@ -161,8 +161,12 @@
     friend class Iterator<typename reflex::TypeOp<T>::ConstType>;
     friend class Iterator<typename reflex::TypeOp<T>::NonConstType>;
    public:
+    /// Non-const AbstractMatcher type
+    typedef typename reflex::TypeOp<T>::NonConstType NonConstT;
+    /// Const AbstractMatcher type
+    typedef typename reflex::TypeOp<T>::ConstType ConstT;
     /// Iterator iterator_category trait.
-    typedef std::output_iterator_tag iterator_category;
+    typedef std::input_iterator_tag iterator_category;
     /// Iterator value_type trait.
     typedef T value_type;
     /// Iterator difference_type trait.
@@ -178,7 +182,7 @@
         method_()
     { }
     /// Copy constructor.
-    Iterator(const Iterator<typename reflex::TypeOp<T>::NonConstType>& it)
+    Iterator(const Iterator<NonConstT>& it)
       :
         matcher_(it.matcher_),
         method_(it.method_)
@@ -196,13 +200,13 @@
       return matcher_;
     }
     /// AbstractMatcher::Iterator equality.
-    bool operator==(const Iterator<typename reflex::TypeOp<T>::ConstType>& 
rhs) const
+    bool operator==(const Iterator<ConstT>& rhs) const
       /// @returns true if iterator equals RHS
     {
       return matcher_ == rhs.matcher_;
     }
     /// AbstractMatcher::Iterator inequality.
-    bool operator!=(const Iterator<typename reflex::TypeOp<T>::ConstType>& 
rhs) const
+    bool operator!=(const Iterator<ConstT>& rhs) const
       /// @returns true if iterator does not equal RHS
     {
       return matcher_ != rhs.matcher_;
@@ -225,8 +229,8 @@
     }
     /// Construct an AbstractMatcher::Iterator to scan, search, or split an 
input character sequence.
     Iterator(
-        AbstractMatcher *matcher, ///< iterate over pattern matches with this 
matcher
-        Method           method)  ///< match using method Const::SCAN, 
Const::FIND, or Const::SPLIT
+        NonConstT *matcher, ///< iterate over pattern matches with this matcher
+        Method     method)  ///< match using method Const::SCAN, Const::FIND, 
or Const::SPLIT
       :
         matcher_(matcher),
         method_(method)
@@ -235,8 +239,8 @@
         matcher_ = NULL;
     }
    private:
-    AbstractMatcher *matcher_; ///< the matcher used by this iterator
-    Method           method_;  ///< the method for pattern matching by this 
iterator's matcher
+    NonConstT *matcher_; ///< the matcher used by this iterator
+    Method     method_;  ///< the method for pattern matching by this 
iterator's matcher
   };
  public:
   typedef AbstractMatcher::Iterator<AbstractMatcher>       iterator;       
///< std::input_iterator for scanning, searching, and splitting input character 
sequences
@@ -431,11 +435,15 @@
     while (in.good()) // there is more to get while good(), e.g. via wrap()
     {
       (void)grow();
-      end_ += get(buf_ + end_, max_ - end_);
+      size_t len = get(buf_ + end_, max_ - end_);
+      if (len == 0)
+        break;
+      end_ += len;
     }
     if (end_ == max_)
       (void)grow(1); // make sure we have room for a final \0
-    return in.eof();
+    eof_ = in.eof();
+    return eof_;
   }
 #if defined(WITH_SPAN)
   /// Set event handler functor to invoke when the buffer contents are shifted 
out, e.g. for logging the data searched.
@@ -633,8 +641,7 @@
   /// Set or change the starting line number of the last match.
   inline void lineno(size_t n) ///< new line number
   {
-    if (lpb_ < txt_)
-      (void)lineno(); // update lno_ and bol_ (or cno_) before overriding lno_
+    (void)lineno(); // update lno_ and bol_ (or cno_) before overriding lno_
     lno_ = n;
   }
   /// Updates and returns the starting line number of the match in the input 
character sequence.
@@ -659,7 +666,7 @@
       else
       {
         __m128i vlcn = _mm_set1_epi8('\n');
-        while (s + 15 <= t)
+        while (s + 16 <= t)
         {
           __m128i vlcm = _mm_loadu_si128(reinterpret_cast<const __m128i*>(s));
           __m128i vlceq = _mm_cmpeq_epi8(vlcm, vlcn);
@@ -676,7 +683,7 @@
       else
       {
         __m128i vlcn = _mm_set1_epi8('\n');
-        while (s + 15 <= t)
+        while (s + 16 <= t)
         {
           __m128i vlcm = _mm_loadu_si128(reinterpret_cast<const __m128i*>(s));
           __m128i vlceq = _mm_cmpeq_epi8(vlcm, vlcn);
@@ -687,7 +694,7 @@
       }
 #elif defined(HAVE_SSE2)
       __m128i vlcn = _mm_set1_epi8('\n');
-      while (s + 15 <= t)
+      while (s + 16 <= t)
       {
         __m128i vlcm = _mm_loadu_si128(reinterpret_cast<const __m128i*>(s));
         __m128i vlceq = _mm_cmpeq_epi8(vlcm, vlcn);
@@ -791,6 +798,17 @@
   {
     return lineno() + lines() - 1;
   }
+  /// Set or change the starting column number of the last match.
+  inline void columno(size_t n) ///< new column number
+  {
+    (void)lineno(); // update lno_ and bol_ (or cno_) before overriding lno_
+#if defined(WITH_SPAN)
+    cpb_ = txt_;
+#else
+    lpb_ = txt_;
+#endif
+    cno_ = n;
+  }
   /// Updates and returns the starting column number of the matched text, 
taking tab spacing into account and counting wide characters as one character 
each
   inline size_t columno()
     /// @returns column number
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/ugrep-3.7.7/lib/matcher.cpp 
new/ugrep-3.7.9/lib/matcher.cpp
--- old/ugrep-3.7.7/lib/matcher.cpp     2022-04-01 21:47:50.000000000 +0200
+++ new/ugrep-3.7.9/lib/matcher.cpp     2022-04-07 22:52:38.000000000 +0200
@@ -812,8 +812,30 @@
   if (bmd_ == 0)
   {
     // Boyer-Moore preprocessing of the given pattern pat of length len, 
generates bmd_ > 0 and bms_[] shifts.
-    // relative frequency table of English letters, source code, and UTF-8 
bytes
-    static unsigned char freq[256] = 
"\0\0\0\0\0\0\0\0\0\73\4\0\0\4\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\73\70\70\1\1\2\2\70\70\70\2\2\70\70\70\2\3\3\3\3\3\3\3\3\3\3\70\70\70\70\70\70\2\35\14\24\26\37\20\17\30\33\11\12\25\22\32\34\15\7\27\31\36\23\13\21\10\16\6\70\1\70\2\70\1\67\46\56\60\72\52\51\62\65\43\44\57\54\64\66\47\41\61\63\71\55\45\53\42\50\40\70\2\70\2\0\47\47\47\47\47\47\47\47\47\47\47\47\47\47\47\47\45\45\45\45\45\45\45\45\45\45\45\45\45\45\45\45\45\45\45\45\45\45\45\45\45\45\45\45\45\45\45\45\44\44\44\44\44\44\44\44\44\44\44\44\44\44\44\44\0\0\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5\46\56\56\56\56\56\56\56\56\56\56\56\56\46\56\56\73\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
+    // updated relative frequency table of English letters (with 
upper/lower-case ratio = 0.0563), punctuation and UTF-8 bytes
+    static unsigned char freq[256] =
+      // x64 binary ugrep.exe frequencies combined with ASCII TAB/LF/CR 
control code frequencies
+      "\377\101\14\22\15\21\10\10\24\73\41\10\11\41\6\51"
+      "\16\4\3\3\3\3\3\3\6\3\3\2\3\4\4\12"
+      // TAB/LF/CR control code frequencies in text
+      // "\0\0\0\0\0\0\0\0\0\73\41\0\0\41\0\0"
+      // "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+      // ASCII frequencies
+      "\377\0\1\1\0\0\16\33\6\6\7\0\27\11\27\14"
+      "\13\14\10\5\4\5\4\4\4\7\12\21\10\14\10\0"
+      "\0\11\2\3\5\16\2\2\7\10\0\1\4\3\7\10"
+      "\2\0\6\7\12\3\1\3\0\2\0\70\1\70\0\1"
+      "\0\237\35\64\133\373\53\47\170\205\3\20\115\64\202\227"
+      "\45\2\162\170\272\64\23\56\3\47\2\3\15\3\0\0"
+      // upper half with UTF-8 multibyte frequencies (synthesized)
+      "\47\47\47\47\47\47\47\47\47\47\47\47\47\47\47\47"
+      "\45\45\45\45\45\45\45\45\45\45\45\45\45\45\45\45"
+      "\45\45\45\45\45\45\45\45\45\45\45\45\45\45\45\45"
+      "\44\44\44\44\44\44\44\44\44\44\44\44\44\44\44\44"
+      "\0\0\5\5\5\5\5\5\5\5\5\5\5\5\5\5"
+      "\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5\5"
+      "\46\56\56\56\56\56\56\56\56\56\56\56\56\46\56\56"
+      "\73\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
     uint8_t n = static_cast<uint8_t>(len); // okay to cast: actually never 
more than 255
     uint16_t i;
     for (i = 0; i < 256; ++i)
@@ -826,12 +848,15 @@
       bms_[pch] = static_cast<uint8_t>(n - i - 1);
       if (i > 0)
       {
-        if (freq[static_cast<uint8_t>(pre[lcp_])] > freq[pch])
+        unsigned char freqpch = freq[pch];
+        uint8_t lcpch = static_cast<uint8_t>(pre[lcp_]);
+        uint8_t lcsch = static_cast<uint8_t>(pre[lcs_]);
+        if (freq[lcpch] > freqpch)
         {
           lcs_ = lcp_;
           lcp_ = i;
         }
-        else if (freq[static_cast<uint8_t>(pre[lcs_])] > freq[pch])
+        else if (lcpch != pch && freq[lcsch] > freqpch)
         {
           lcs_ = i;
         }
@@ -842,7 +867,7 @@
       if (pre[j - 1] == pre[i])
         break;
     bmd_ = i - j + 1;
-#if defined(HAVE_AVX512BW) || defined(HAVE_AVX2) || defined(HAVE_SSE2) || 
defined(__SSE2__) || defined(__x86_64__) || _M_IX86_FP == 2
+#if defined(HAVE_AVX512BW) || defined(HAVE_AVX2) || defined(HAVE_SSE2) || 
defined(__SSE2__) || defined(__x86_64__) || _M_IX86_FP == 2 || 
defined(HAVE_NEON)
     size_t score = 0;
     for (i = 0; i < n; ++i)
       score += bms_[static_cast<uint8_t>(pre[i])];
@@ -851,19 +876,19 @@
 #if defined(HAVE_AVX512BW) || defined(HAVE_AVX2) || defined(HAVE_SSE2)
     if (!have_HW_SSE2() && !have_HW_AVX2() && !have_HW_AVX512BW())
     {
-      // if scoring is high and freq is high, then use our improved 
Boyer-Moore instead of memchr()
+      // if scoring is high and freq is high, then use our improved 
Boyer-Moore instead
 #if defined(__SSE2__) || defined(__x86_64__) || _M_IX86_FP == 2
-      // SSE2 is available, expect fast memchr() to use instead of BM
+      // SSE2 is available, expect fast memchr()
       if (score > 1 && fch > 35 && (score > 3 || fch > 50) && fch + score > 52)
         lcs_ = 0xffff;
 #else
-      // no SSE2 available, expect slow memchr() and use BM unless score or 
frequency are too low
+      // no SSE2 available, expect slow memchr()
       if (fch > 37 || (fch > 8 && score > 0))
         lcs_ = 0xffff;
 #endif
     }
-#elif defined(__SSE2__) || defined(__x86_64__) || _M_IX86_FP == 2
-    // SSE2 is available, expect fast memchr() to use instead of BM
+#elif defined(__SSE2__) || defined(__x86_64__) || _M_IX86_FP == 2 || 
defined(HAVE_NEON)
+    // SIMD is available, if scoring is high and freq is high, then use our 
improved Boyer-Moore
     if (score > 1 && fch > 35 && (score > 3 || fch > 50) && fch + score > 52)
       lcs_ = 0xffff;
 #endif
@@ -876,7 +901,7 @@
       const char *s = buf_ + loc + lcp_;
       const char *e = buf_ + end_ + lcp_ - len + 1;
 #if defined(COMPILE_AVX512BW)
-      // implements AVX512BW string search scheme based on in 
http://0x80.pl/articles/simd-friendly-karp-rabin.html
+      // implements AVX512BW string search scheme based on 
http://0x80.pl/articles/simd-friendly-karp-rabin.html
       __m512i vlcp = _mm512_set1_epi8(pre[lcp_]);
       __m512i vlcs = _mm512_set1_epi8(pre[lcs_]);
       while (s + 64 <= e)
@@ -909,7 +934,7 @@
         s += 64;
       }
 #elif defined(COMPILE_AVX2)
-      // implements AVX2 string search scheme based on in 
http://0x80.pl/articles/simd-friendly-karp-rabin.html
+      // implements AVX2 string search scheme based on 
http://0x80.pl/articles/simd-friendly-karp-rabin.html
       __m256i vlcp = _mm256_set1_epi8(pre[lcp_]);
       __m256i vlcs = _mm256_set1_epi8(pre[lcs_]);
       while (s + 32 <= e)
@@ -944,7 +969,7 @@
         s += 32;
       }
 #elif defined(HAVE_SSE2)
-      // implements SSE2 string search scheme based on in 
http://0x80.pl/articles/simd-friendly-karp-rabin.html
+      // implements SSE2 string search scheme based on 
http://0x80.pl/articles/simd-friendly-karp-rabin.html
       __m128i vlcp = _mm_set1_epi8(pre[lcp_]);
       __m128i vlcs = _mm_set1_epi8(pre[lcs_]);
       while (s + 16 <= e)
@@ -979,7 +1004,7 @@
         s += 16;
       }
 #elif defined(HAVE_NEON)
-      // implements NEON/AArch64 string search scheme based on in 
http://0x80.pl/articles/simd-friendly-karp-rabin.html but 64 bit optimized
+      // implements NEON/AArch64 string search scheme based on 
http://0x80.pl/articles/simd-friendly-karp-rabin.html but 64 bit optimized
       uint8x16_t vlcp = vdupq_n_u8(pre[lcp_]);
       uint8x16_t vlcs = vdupq_n_u8(pre[lcs_]);
       while (s + 16 <= e)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/ugrep-3.7.7/lib/simd_avx2.cpp 
new/ugrep-3.7.9/lib/simd_avx2.cpp
--- old/ugrep-3.7.7/lib/simd_avx2.cpp   2022-04-01 21:47:50.000000000 +0200
+++ new/ugrep-3.7.9/lib/simd_avx2.cpp   2022-04-07 22:52:38.000000000 +0200
@@ -45,7 +45,7 @@
   size_t n = 0;
 #if defined(HAVE_AVX2)
   __m256i vlcn = _mm256_set1_epi8('\n');
-  while (s + 31 <= e)
+  while (s + 32 <= e)
   {
     __m256i vlcm = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(s));
     __m256i vlceq = _mm256_cmpeq_epi8(vlcm, vlcn);
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/ugrep-3.7.7/lib/simd_avx512bw.cpp 
new/ugrep-3.7.9/lib/simd_avx512bw.cpp
--- old/ugrep-3.7.7/lib/simd_avx512bw.cpp       2022-04-01 21:47:50.000000000 
+0200
+++ new/ugrep-3.7.9/lib/simd_avx512bw.cpp       2022-04-07 22:52:38.000000000 
+0200
@@ -45,7 +45,7 @@
   size_t n = 0;
 #if defined(HAVE_AVX512BW) && (!defined(_MSC_VER) || defined(_WIN64))
   __m512i vlcn = _mm512_set1_epi8('\n');
-  while (s + 63 <= e)
+  while (s + 64 <= e)
   {
     __m512i vlcm = _mm512_loadu_si512(reinterpret_cast<const __m512i*>(s));
     uint64_t mask = _mm512_cmpeq_epi8_mask(vlcm, vlcn);
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/ugrep-3.7.7/man/ugrep.1 new/ugrep-3.7.9/man/ugrep.1
--- old/ugrep-3.7.7/man/ugrep.1 2022-04-01 21:47:50.000000000 +0200
+++ new/ugrep-3.7.9/man/ugrep.1 2022-04-07 22:52:38.000000000 +0200
@@ -1,4 +1,4 @@
-.TH UGREP "1" "April 01, 2022" "ugrep 3.7.7" "User Commands"
+.TH UGREP "1" "April 07, 2022" "ugrep 3.7.9" "User Commands"
 .SH NAME
 \fBugrep\fR, \fBug\fR -- file pattern searcher
 .SH SYNOPSIS
@@ -212,7 +212,9 @@
 to specify a pattern after option \fB\-f\fR or after the FILE arguments.
 .TP
 \fB\-\-encoding\fR=\fIENCODING\fR
-The encoding format of the input, where ENCODING can be:
+The encoding format of the input.  The default ENCODING is binary
+and UTF\-8 which are the same.  Note that option \fB\-U\fR specifies binary
+PATTERN matching (text matching is the default.)  ENCODING can be:
 `binary', `ASCII', `UTF\-8', `UTF\-16',
 `UTF\-16BE', `UTF\-16LE', `UTF\-32', `UTF\-32BE',
 `UTF\-32LE', `LATIN1', `ISO\-8859\-1', `ISO\-8859\-2',
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/ugrep-3.7.7/src/stats.cpp 
new/ugrep-3.7.9/src/stats.cpp
--- old/ugrep-3.7.7/src/stats.cpp       2022-04-01 21:47:50.000000000 +0200
+++ new/ugrep-3.7.9/src/stats.cpp       2022-04-07 22:52:38.000000000 +0200
@@ -43,20 +43,31 @@
 {
   size_t sf = searched_files();
   size_t sd = searched_dirs();
+  size_t sl = searched_lines();
   size_t ff = found_files();
   size_t fp = found_parts();
+  size_t fm = found_matches();
   size_t ws = warnings;
 
-  fprintf(output, "Searched %zu file%s", sf, (sf == 1 ? "" : "s"));
-  if (threads > 1)
-    fprintf(output, " with %zu threads", threads);
+  fprintf(output, NEWLINESTR "Searched %zu file%s", sf, (sf == 1 ? "" : "s"));
   if (sd > 0)
     fprintf(output, " in %zu director%s", sd, (sd == 1 ? "y" : "ies"));
-  fprintf(output, ": %zu matching", ff);
+  fprintf(output, " in %.3g seconds", 0.001 * reflex::timer_elapsed(timer));
+  if (threads > 1)
+    fprintf(output, " with %zu threads", threads);
+  fprintf(output, ": %zu matching (%.4g%%)", ff, 100.0 * ff / sf);
   if (fp > ff)
-    fprintf(output, " + %zu in archives" NEWLINESTR, fp - ff);
-  else
-    fprintf(output, NEWLINESTR);
+    fprintf(output, " + %zu in archives", fp - ff);
+  fprintf(output, NEWLINESTR);
+
+  if (fm > 0 && !flag_quiet && !flag_files_with_matches && 
!flag_files_without_match)
+  {
+    if (flag_ungroup || (flag_count && flag_only_matching))
+      fprintf(output, "Searched %zu line%s and found %zu matches (ungrouped)" 
NEWLINESTR, sl, (sl == 1 ? "" : "s"), fm);
+    else
+      fprintf(output, "Searched %zu line%s: %zu matching (%.4g%%)" NEWLINESTR, 
sl, (sl == 1 ? "" : "s"), fm, 100.0 * fm / sl);
+  }
+
   if (warnings > 0)
     fprintf(output, "Received %zu warning%s" NEWLINESTR, ws, ws == 1 ? "" : 
"s");
 
@@ -142,8 +153,11 @@
     fprintf(output, "  --exclude-dir='%s'%s" NEWLINESTR, i.c_str(), i.front() 
== '!' ? " (negated)" : "");
 }
 
+reflex::timer_type       Stats::timer;
 size_t                   Stats::files  = 0;
 size_t                   Stats::dirs   = 0;
 std::atomic_size_t       Stats::fileno;
 std::atomic_size_t       Stats::partno;
+std::atomic_size_t       Stats::matchno;
+std::atomic_size_t       Stats::lineno;
 std::vector<std::string> Stats::ignore;
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/ugrep-3.7.7/src/stats.hpp 
new/ugrep-3.7.9/src/stats.hpp
--- old/ugrep-3.7.7/src/stats.hpp       2022-04-01 21:47:50.000000000 +0200
+++ new/ugrep-3.7.9/src/stats.hpp       2022-04-07 22:52:38.000000000 +0200
@@ -47,6 +47,7 @@
 #define STATS_HPP
 
 #include "ugrep.hpp"
+#include <reflex/timer.h>
 
 // static class to collect global statistics
 class Stats {
@@ -56,10 +57,13 @@
   // reset stats
   static void reset()
   {
+    reflex::timer_start(timer);
     files = 0;
     dirs = 0;
     fileno = 0;
     partno = 0;
+    lineno = 0;
+    matchno = 0;
     ignore.clear();
   }
 
@@ -75,6 +79,13 @@
     ++dirs;
   }
 
+  // score matches
+  static void score_matches(size_t matches, size_t lines)
+  {
+    matchno += matches;
+    lineno += lines;
+  }
+
   // number of files searched
   static size_t searched_files()
   {
@@ -87,6 +98,12 @@
     return dirs;
   }
 
+  // number of lines searched
+  static size_t searched_lines()
+  {
+    return lineno;
+  }
+
   // atomically update the number of matching files found, excluding files in 
archives returns true if max file matches (+ number of threads-1 when sorting) 
is not reached yet
   static bool found_file()
   {
@@ -127,6 +144,12 @@
     return fileno > 0;
   }
 
+  // found matches
+  static size_t found_matches()
+  {
+    return matchno;
+  }
+
   // a .gitignore or similar file was encountered
   static void ignore_file(const std::string& filename)
   {
@@ -138,11 +161,14 @@
 
  protected:
 
-  static size_t                   files;  // number of files searched, 
excluding files in archives
-  static size_t                   dirs;   // number of directories searched
-  static std::atomic_size_t       fileno; // number of matching files, 
excluding files in archives, atomic for GrepWorker::search() update
-  static std::atomic_size_t       partno; // number of matching files, 
including files in archives, atomic for GrepWorker::search() update
-  static std::vector<std::string> ignore; // the .gitignore files encountered 
in the recursive search with --ignore-files
+  static reflex::timer_type       timer;   // elapsed wall-clock time in milli 
seconds (ms)
+  static size_t                   files;   // number of files searched, 
excluding files in archives
+  static size_t                   dirs;    // number of directories searched
+  static std::atomic_size_t       fileno;  // number of matching files, 
excluding files in archives, atomic for GrepWorker::search() update
+  static std::atomic_size_t       partno;  // number of matching files, 
including files in archives, atomic for GrepWorker::search() update
+  static std::atomic_size_t       lineno;  // number of lines searched 
cummulatively
+  static std::atomic_size_t       matchno; // number of matches found 
cummulatively
+  static std::vector<std::string> ignore;  // the .gitignore files encountered 
in the recursive search with --ignore-files
 
 };
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/ugrep-3.7.7/src/ugrep.cpp 
new/ugrep-3.7.9/src/ugrep.cpp
--- old/ugrep-3.7.7/src/ugrep.cpp       2022-04-01 21:47:50.000000000 +0200
+++ new/ugrep-3.7.9/src/ugrep.cpp       2022-04-07 22:52:38.000000000 +0200
@@ -236,6 +236,9 @@
 
 #endif
 
+// unique identifier (address) for standard input path
+static const char *LABEL_STANDARD_INPUT = "(standard input)";
+
 // full home directory path
 const char *home_dir = NULL;
 
@@ -405,7 +408,7 @@
 const char *flag_format_open       = NULL;
 const char *flag_group_separator   = "--";
 const char *flag_hexdump           = NULL;
-const char *flag_label             = "(standard input)";
+const char *flag_label             = LABEL_STANDARD_INPUT;
 const char *flag_pager             = DEFAULT_PAGER;
 const char *flag_replace           = NULL;
 const char *flag_save_config       = NULL;
@@ -2937,7 +2940,7 @@
   // open a file for (binary) reading and assign input, decompress the file 
when -z, --decompress specified, may throw bad_alloc
   bool open_file(const char *pathname)
   {
-    if (pathname == NULL || *pathname == '\0')
+    if (pathname == LABEL_STANDARD_INPUT)
     {
       if (source == NULL)
         return false;
@@ -7438,7 +7441,7 @@
     Stats::score_file();
 
     // search standard input
-    search(NULL);
+    search(LABEL_STANDARD_INPUT);
   }
 
   if (arg_files.empty())
@@ -8341,7 +8344,7 @@
   }
 
   // pathname is NULL when stdin is searched
-  if (pathname == NULL || *pathname == '\0')
+  if (pathname == LABEL_STANDARD_INPUT)
     pathname = flag_label;
 
   bool colorize = flag_apply_color || flag_tag != NULL;
@@ -11059,6 +11062,8 @@
       // --break: add a line break when applicable
       if (flag_break && (matches > 0 || flag_any_line) && !flag_quiet && 
!flag_files_with_matches && !flag_count && flag_format == NULL)
         out.nl();
+
+      Stats::score_matches(matches, matcher->lineno() > 0 ? matcher->lineno() 
- 1 : 0);
     }
 
     catch (EXIT_SEARCH&)
@@ -11676,7 +11681,9 @@
             specify multiple patterns, when a pattern begins with a dash 
(`-'),\n\
             to specify a pattern after option -f or after the FILE 
arguments.\n\
     --encoding=ENCODING\n\
-            The encoding format of the input, where ENCODING can be:";
+            The encoding format of the input.  The default ENCODING is 
binary\n\
+            and UTF-8 which are the same.  Note that option -U specifies 
binary\n\
+            PATTERN matching (text matching is the default.)  ENCODING can 
be:";
   for (int i = 0; encoding_table[i].format != NULL; ++i)
     out << (i == 0 ? "" : ",") << (i % 4 ? " " : "\n            ") << "`" << 
encoding_table[i].format << "'";
   out << ".\n\
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/ugrep-3.7.7/src/ugrep.hpp 
new/ugrep-3.7.9/src/ugrep.hpp
--- old/ugrep-3.7.7/src/ugrep.hpp       2022-04-01 21:47:50.000000000 +0200
+++ new/ugrep-3.7.9/src/ugrep.hpp       2022-04-07 22:52:38.000000000 +0200
@@ -38,7 +38,7 @@
 #define UGREP_HPP
 
 // ugrep version
-#define UGREP_VERSION "3.7.7"
+#define UGREP_VERSION "3.7.9"
 
 // disable mmap because mmap is almost always slower than the file reading 
speed improvements since 3.0.0
 #define WITH_NO_MMAP

commit ugrep for openSUSE:Factory

Reply via email to