Thanks for the patch. I tweaked its commit message (see first
attachment). While reviewing it I found opportunities to clarify and/or
simplify related code, so I did that too (see second attachment). Both
are installed and I am marking this bug report as done.
From a22980679ef993949bf443a9e255ce95a1147ea8 Mon Sep 17 00:00:00 2001
From: Norihiro Tanaka <[email protected]>
Date: Wed, 7 May 2014 17:55:45 +0900
Subject: [PATCH 1/2] grep: improve performance of -v when combined with -L, -l
or -q
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Problem reported by Jörn Hees in: http://bugs.gnu.org/17427
* src/grep.c (grepbuf, grep): When -v is combined with -L, -l, or -q,
don't read data unnecessarily after a non-match is found.
---
src/grep.c | 13 +++++++++----
1 file changed, 9 insertions(+), 4 deletions(-)
diff --git a/src/grep.c b/src/grep.c
index a661fc0..9db74d6 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -1131,8 +1131,12 @@ grepbuf (char const *beg, char const *lim)
prtext (p, b, &n);
nlines += n;
outleft -= n;
- if (!outleft)
- return nlines;
+ if (!outleft || done_on_match)
+ {
+ if (exit_on_match)
+ exit (EXIT_SUCCESS);
+ return nlines;
+ }
}
p = endp;
}
@@ -1141,6 +1145,8 @@ grepbuf (char const *beg, char const *lim)
prtext (p, lim, &n);
nlines += n;
outleft -= n;
+ if (exit_on_match)
+ exit (EXIT_SUCCESS);
}
return nlines;
}
@@ -1219,8 +1225,7 @@ grep (int fd, struct stat const *st)
nlines += grepbuf (beg, lim);
if (pending)
prpending (lim);
- if ((!outleft && !pending)
- || (nlines && done_on_match && !out_invert))
+ if ((!outleft && !pending) || (nlines && done_on_match))
goto finish_grep;
}
--
1.9.0
From 821d8904e3cc53a68ce8c702de9f936665cfa223 Mon Sep 17 00:00:00 2001
From: Paul Eggert <[email protected]>
Date: Thu, 8 May 2014 09:17:57 -0700
Subject: [PATCH 2/2] grep: simplify and clarify invert-related code
* src/grep.c (out_invert, prtext): Use bool for booleans.
(prline): Remove unnecessary '!!' on a value that is always 0 or 1.
(prtext): Remove last arg NLINESP; use !out_invert instead. All uses
changed. Move decls to nearer uses, since we can assume C99 here.
Update 'outleft' and 'after_last_match' here; it's simpler.
(grepbuf): Compute return value by subtracting new from old 'outleft',
rather than by keeping a separate running total. Avoid code duplication
by arranging for prtext to be called from one place, not three.
---
src/grep.c | 112 +++++++++++++++++++++++++++----------------------------------
1 file changed, 49 insertions(+), 63 deletions(-)
diff --git a/src/grep.c b/src/grep.c
index 9db74d6..1e3fc28 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -663,7 +663,7 @@ static enum
static int filename_mask; /* If zero, output nulls after filenames. */
static int out_quiet; /* Suppress all normal output. */
-static int out_invert; /* Print nonmatching stuff. */
+static bool out_invert; /* Print nonmatching stuff. */
static int out_file; /* Print filenames. */
static int out_line; /* Print line numbers. */
static int out_byte; /* Print byte offsets. */
@@ -912,7 +912,7 @@ prline (char const *beg, char const *lim, int sep)
if (!only_matching)
print_line_head (beg, lim, sep);
- matching = (sep == SEP_CHAR_SELECTED) ^ !!out_invert;
+ matching = (sep == SEP_CHAR_SELECTED) ^ out_invert;
if (color_option)
{
@@ -977,34 +977,31 @@ prpending (char const *lim)
}
}
-/* Print the lines between BEG and LIM. Deal with context crap.
- If NLINESP is non-null, store a count of lines between BEG and LIM. */
+/* Output the lines between BEG and LIM. Deal with context. */
static void
-prtext (char const *beg, char const *lim, intmax_t *nlinesp)
+prtext (char const *beg, char const *lim)
{
- static int used; /* avoid printing SEP_STR_GROUP before any output */
- char const *bp, *p;
+ static bool used; /* Avoid printing SEP_STR_GROUP before any output. */
char eol = eolbyte;
- intmax_t i, n;
if (!out_quiet && pending > 0)
prpending (beg);
- p = beg;
+ char const *p = beg;
if (!out_quiet)
{
- /* Deal with leading context crap. */
-
- bp = lastout ? lastout : bufbeg;
+ /* Deal with leading context. */
+ char const *bp = lastout ? lastout : bufbeg;
+ intmax_t i;
for (i = 0; i < out_before; ++i)
if (p > bp)
do
--p;
while (p[-1] != eol);
- /* We print the SEP_STR_GROUP separator only if our output is
- discontiguous from the last output in the file. */
+ /* Print the group separator unless the output is adjacent to
+ the previous output in the file. */
if ((out_before || out_after) && used && p != lastout && group_separator)
{
pr_sgr_start_if (sep_color);
@@ -1022,9 +1019,10 @@ prtext (char const *beg, char const *lim, intmax_t
*nlinesp)
}
}
- if (nlinesp)
+ intmax_t n;
+ if (out_invert)
{
- /* Caller wants a line count. */
+ /* One or more lines are output. */
for (n = 0; p < lim && n < outleft; n++)
{
char const *nl = memchr (p, eol, lim - p);
@@ -1033,16 +1031,20 @@ prtext (char const *beg, char const *lim, intmax_t
*nlinesp)
prline (p, nl, SEP_CHAR_SELECTED);
p = nl;
}
- *nlinesp = n;
-
- /* relying on it that this function is never called when outleft = 0. */
- after_last_match = bufoffset - (buflim - p);
}
- else if (!out_quiet)
- prline (beg, lim, SEP_CHAR_SELECTED);
+ else
+ {
+ /* Just one line is output. */
+ if (!out_quiet)
+ prline (beg, lim, SEP_CHAR_SELECTED);
+ n = 1;
+ p = lim;
+ }
+ after_last_match = bufoffset - (buflim - p);
pending = out_quiet ? 0 : out_after;
- used = 1;
+ used = true;
+ outleft -= n;
}
/* Invoke the matcher, EXECUTE, on buffer BUF of SIZE bytes. If there
@@ -1098,57 +1100,41 @@ do_execute (char const *buf, size_t size, size_t
*match_size,
static intmax_t
grepbuf (char const *beg, char const *lim)
{
- intmax_t nlines, n;
+ intmax_t outleft0 = outleft;
char const *p;
- size_t match_offset;
- size_t match_size;
+ char const *endp;
- nlines = 0;
- p = beg;
- while ((match_offset = do_execute (p, lim - p, &match_size,
- NULL)) != (size_t) -1)
+ for (p = beg; p < lim; p = endp)
{
+ size_t match_size;
+ size_t match_offset = do_execute (p, lim - p, &match_size, NULL);
+ if (match_offset == (size_t) -1)
+ {
+ if (!out_invert)
+ break;
+ match_offset = lim - p;
+ match_size = 0;
+ }
char const *b = p + match_offset;
- char const *endp = b + match_size;
+ endp = b + match_size;
/* Avoid matching the empty line at the end of the buffer. */
- if (b == lim)
+ if (!out_invert && b == lim)
break;
- if (!out_invert)
- {
- prtext (b, endp, NULL);
- nlines++;
- outleft--;
- if (!outleft || done_on_match)
- {
- if (exit_on_match)
- exit (EXIT_SUCCESS);
- after_last_match = bufoffset - (buflim - endp);
- return nlines;
- }
- }
- else if (p < b)
+ if (!out_invert || p < b)
{
- prtext (p, b, &n);
- nlines += n;
- outleft -= n;
+ char const *prbeg = out_invert ? p : b;
+ char const *prend = out_invert ? b : endp;
+ prtext (prbeg, prend);
if (!outleft || done_on_match)
{
if (exit_on_match)
exit (EXIT_SUCCESS);
- return nlines;
+ break;
}
}
- p = endp;
- }
- if (out_invert && p < lim)
- {
- prtext (p, lim, &n);
- nlines += n;
- outleft -= n;
- if (exit_on_match)
- exit (EXIT_SUCCESS);
}
- return nlines;
+
+ return outleft0 - outleft;
}
/* Search a given file. Normally, return a count of lines printed;
@@ -1242,7 +1228,7 @@ grep (int fd, struct stat const *st)
while (beg[-1] != eol);
}
- /* detect if leading context is discontinuous from last printed line. */
+ /* Detect whether leading context is adjacent to previous output. */
if (beg != lastout)
lastout = 0;
@@ -2177,7 +2163,7 @@ main (int argc, char **argv)
break;
case 'v':
- out_invert = 1;
+ out_invert = true;
break;
case 'w':
@@ -2322,7 +2308,7 @@ main (int argc, char **argv)
if (keycc == 0)
{
/* No keys were specified (e.g. -f /dev/null). Match nothing. */
- out_invert ^= 1;
+ out_invert ^= true;
match_lines = match_words = 0;
}
else
--
1.9.0