Thanks, I pushed your patch (with a minor change to make it integrate
with the latest grep) and then pushed some fixes and one major
simplification: don't have any special case for "grep -iF PAT" when PAT
contains no alphabetics. This is rare enough that I expect it's not
worth complicating grep to worry about it.
I'm attaching the combined patch, that is the merge of your patch plus
my changes.
diff --git a/src/grep.c b/src/grep.c
index 475cb1a..a1bccdb 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -1904,6 +1904,45 @@ parse_grep_colors (void)
? (*(s) = wctob ((wint_t) (wc)), 1) \
: wcrtomb (s, wc, ps))
+/* Change a pattern for fgrep into grep. */
+static void
+fgrep_to_grep_pattern (size_t len, char const *keys,
+ size_t *new_len, char **new_keys)
+{
+ char *p = *new_keys = xnmalloc (len + 1, 2);
+ mbstate_t mb_state = { 0 };
+ size_t n;
+
+ for (; len; keys += n, len -= n)
+ {
+ wchar_t wc;
+ n = MBRTOWC (&wc, keys, len, &mb_state);
+ switch (n)
+ {
+ case (size_t) -2:
+ n = len;
+ /* Fall through. */
+ default:
+ p = mempcpy (p, keys, n);
+ break;
+
+ case (size_t) -1:
+ memset (&mb_state, 0, sizeof mb_state);
+ /* Fall through. */
+ case 1:
+ *p = '\\';
+ p += strchr ("$*.[\\^", *keys) != NULL;
+ /* Fall through. */
+ case 0:
+ *p++ = *keys;
+ n = 1;
+ break;
+ }
+ }
+
+ *new_len = p - *new_keys;
+}
+
/* If the newline-separated regular expressions, KEYS (with length, LEN
and no trailing NUL byte), are amenable to transformation into
otherwise equivalent case-ignoring ones, perform the transformation,
@@ -2379,6 +2418,21 @@ main (int argc, char **argv)
else
usage (EXIT_TROUBLE);
+ /* If case-insensitive fgrep in a multibyte locale, improve
+ performance by using grep instead. */
+ if (match_icase && compile == Fcompile && MB_CUR_MAX > 1)
+ {
+ size_t new_keycc;
+ char *new_keys;
+ fgrep_to_grep_pattern (keycc, keys, &new_keycc, &new_keys);
+ free (keys);
+ keys = new_keys;
+ keycc = new_keycc;
+ matcher = "grep";
+ compile = Gcompile;
+ execute = EGexecute;
+ }
+
/* Case-insensitive matching is expensive in multibyte locales
because a few characters may change size when converted to upper
or lower case. To accommodate those, search the input one line