On Sun, May 21, 2017 at 1:50 AM, Junio C Hamano <gits...@pobox.com> wrote: > Ævar Arnfjörð Bjarmason <ava...@gmail.com> writes: > >> Make the --regexp-ignore-case option work with --perl-regexp. This >> never worked, and there was no test for this. Fix the bug and add a >> test. >> >> When PCRE support was added in commit 63e7e9d8b6 ("git-grep: Learn >> PCRE", 2011-05-09) compile_pcre_regexp() would only check >> opt->ignore_case, but when the --perl-regexp option was added in >> commit 727b6fc3ed ("log --grep: accept --basic-regexp and >> --perl-regexp", 2012-10-03) the code didn't set the opt->ignore_case. >> >> Change the test suite to test for -i and --invert-regexp with >> basic/extended/perl patterns in addition to fixed, which was the only >> patternType that was tested for before in combination with those >> options. >> >> Signed-off-by: Ævar Arnfjörð Bjarmason <ava...@gmail.com> >> --- >> revision.c | 1 + >> t/t4202-log.sh | 60 >> +++++++++++++++++++++++++++++++++++++++++++++++++++++----- >> 2 files changed, 56 insertions(+), 5 deletions(-) >> >> diff --git a/revision.c b/revision.c >> index 8a8c1789c7..4883cdd2d0 100644 >> --- a/revision.c >> +++ b/revision.c >> @@ -1991,6 +1991,7 @@ static int handle_revision_opt(struct rev_info *revs, >> int argc, const char **arg >> } else if (!strcmp(arg, "--extended-regexp") || !strcmp(arg, "-E")) { >> revs->grep_filter.pattern_type_option = GREP_PATTERN_TYPE_ERE; >> } else if (!strcmp(arg, "--regexp-ignore-case") || !strcmp(arg, "-i")) >> { >> + revs->grep_filter.ignore_case = 1; >> revs->grep_filter.regflags |= REG_ICASE; >> DIFF_OPT_SET(&revs->diffopt, PICKAXE_IGNORE_CASE); >> } else if (!strcmp(arg, "--fixed-strings") || !strcmp(arg, "-F")) { > > Looks good. > > I however wonder if it is a better approach in the longer term to > treat the .ignore_case field just like .extended_regexp_option > field, i.e. not committing immediately to .regflags but commit it > after config and command line parsing is done, just like we make the > "BRE? ERE?" decision in grep_commit_pattern_type().
I started hacking up a patch to fix the root cause of this, i.e. the users of the grep API should only set `.ignore_case = 1` and not care about setting regflags, but it was more than a trivial change, so I didn't include it in this series: diff --git a/builtin/grep.c b/builtin/grep.c index 3ffb5b4e81..be28c37265 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -1151,8 +1151,6 @@ int cmd_grep(int argc, const char **argv, const char *prefix) if (!opt.pattern_list) die(_("no pattern given.")); - if (!opt.fixed && opt.ignore_case) - opt.regflags |= REG_ICASE; compile_grep_patterns(&opt); diff --git a/grep.c b/grep.c index 47cee45067..7b13ee1043 100644 --- a/grep.c +++ b/grep.c @@ -435,12 +435,11 @@ static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt) static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) { - int icase, ascii_only; + int ascii_only; int err; p->word_regexp = opt->word_regexp; p->ignore_case = opt->ignore_case; - icase = opt->regflags & REG_ICASE || p->ignore_case; ascii_only = !has_non_ascii(p->pattern); /* @@ -456,12 +455,12 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) * want to use kws. */ if (opt->fixed || is_fixed(p->pattern, p->patternlen)) - p->fixed = !icase || ascii_only; + p->fixed = !p->ignore_case || ascii_only; else p->fixed = 0; if (p->fixed) { - p->kws = kwsalloc(icase ? tolower_trans_tbl : NULL); + p->kws = kwsalloc(p->ignore_case ? tolower_trans_tbl : NULL); kwsincr(p->kws, p->pattern, p->patternlen); kwsprep(p->kws); return; @@ -480,6 +479,8 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) return; } + if (p->ignore_case) + opt->regflags |= REG_ICASE; err = regcomp(&p->regexp, p->pattern, opt->regflags); if (err) { char errbuf[1024]; diff --git a/revision.c b/revision.c index 4883cdd2d0..30c23a1098 100644 --- a/revision.c +++ b/revision.c @@ -1992,7 +1992,6 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg revs->grep_filter.pattern_type_option = GREP_PATTERN_TYPE_ERE; } else if (!strcmp(arg, "--regexp-ignore-case") || !strcmp(arg, "-i")) { revs->grep_filter.ignore_case = 1; - revs->grep_filter.regflags |= REG_ICASE; DIFF_OPT_SET(&revs->diffopt, PICKAXE_IGNORE_CASE); } else if (!strcmp(arg, "--fixed-strings") || !strcmp(arg, "-F")) { revs->grep_filter.pattern_type_option = GREP_PATTERN_TYPE_FIXED; But an even better solution is to get rid of passing the regflags field in grep_opt entirely, this conflicts with some of my later patches: diff --git a/builtin/grep.c b/builtin/grep.c index 3ffb5b4e81..be28c37265 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -1151,8 +1151,6 @@ int cmd_grep(int argc, const char **argv, const char *prefix) if (!opt.pattern_list) die(_("no pattern given.")); - if (!opt.fixed && opt.ignore_case) - opt.regflags |= REG_ICASE; compile_grep_patterns(&opt); diff --git a/grep.c b/grep.c index 47cee45067..1bde7037ba 100644 --- a/grep.c +++ b/grep.c @@ -34,7 +34,6 @@ void init_grep_defaults(void) memset(opt, 0, sizeof(*opt)); opt->relative = 1; opt->pathname = 1; - opt->regflags = REG_NEWLINE; opt->max_depth = -1; opt->pattern_type_option = GREP_PATTERN_TYPE_UNSPECIFIED; opt->extended_regexp_option = 0; @@ -156,7 +155,6 @@ void grep_init(struct grep_opt *opt, const char *prefix) opt->linenum = def->linenum; opt->max_depth = def->max_depth; opt->pathname = def->pathname; - opt->regflags = def->regflags; opt->relative = def->relative; opt->output = def->output; @@ -179,25 +177,25 @@ static void grep_set_pattern_type_option(enum grep_pattern_type pattern_type, st case GREP_PATTERN_TYPE_BRE: opt->fixed = 0; opt->pcre = 0; - opt->regflags &= ~REG_EXTENDED; + opt->extended = 0; break; case GREP_PATTERN_TYPE_ERE: opt->fixed = 0; opt->pcre = 0; - opt->regflags |= REG_EXTENDED; + opt->extended = 1; break; case GREP_PATTERN_TYPE_FIXED: opt->fixed = 1; opt->pcre = 0; - opt->regflags &= ~REG_EXTENDED; + opt->extended = 0; break; case GREP_PATTERN_TYPE_PCRE: opt->fixed = 0; opt->pcre = 1; - opt->regflags &= ~REG_EXTENDED; + opt->extended = 0; break; } } @@ -415,10 +413,9 @@ static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt) { struct strbuf sb = STRBUF_INIT; int err; - int regflags; + int regflags = REG_NEWLINE; basic_regex_quote_buf(&sb, p->pattern); - regflags = opt->regflags & ~REG_EXTENDED; if (opt->ignore_case) regflags |= REG_ICASE; err = regcomp(&p->regexp, sb.buf, regflags); @@ -435,12 +432,12 @@ static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt) static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) { - int icase, ascii_only; + int ascii_only; int err; + int regflags = REG_NEWLINE; p->word_regexp = opt->word_regexp; p->ignore_case = opt->ignore_case; - icase = opt->regflags & REG_ICASE || p->ignore_case; ascii_only = !has_non_ascii(p->pattern); /* @@ -456,12 +453,12 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) * want to use kws. */ if (opt->fixed || is_fixed(p->pattern, p->patternlen)) - p->fixed = !icase || ascii_only; + p->fixed = !p->ignore_case || ascii_only; else p->fixed = 0; if (p->fixed) { - p->kws = kwsalloc(icase ? tolower_trans_tbl : NULL); + p->kws = kwsalloc(p->ignore_case ? tolower_trans_tbl : NULL); kwsincr(p->kws, p->pattern, p->patternlen); kwsprep(p->kws); return; @@ -480,7 +477,11 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) return; } - err = regcomp(&p->regexp, p->pattern, opt->regflags); + if (p->ignore_case) + regflags |= REG_ICASE; + if (opt->extended) + regflags |= REG_EXTENDED; + err = regcomp(&p->regexp, p->pattern, regflags); if (err) { char errbuf[1024]; regerror(err, &p->regexp, errbuf, 1024); diff --git a/grep.h b/grep.h index 267534ca24..d9d603deb1 100644 --- a/grep.h +++ b/grep.h @@ -129,7 +129,6 @@ struct grep_opt { char color_match_selected[COLOR_MAXLEN]; char color_selected[COLOR_MAXLEN]; char color_sep[COLOR_MAXLEN]; - int regflags; unsigned pre_context; unsigned post_context; unsigned last_shown; diff --git a/revision.c b/revision.c index 4883cdd2d0..67240d38af 100644 --- a/revision.c +++ b/revision.c @@ -1362,7 +1362,6 @@ void init_revisions(struct rev_info *revs, const char *prefix) init_grep_defaults(); grep_init(&revs->grep_filter, prefix); revs->grep_filter.status_only = 1; - revs->grep_filter.regflags = REG_NEWLINE; diff_setup(&revs->diffopt); if (prefix && !revs->diffopt.prefix) { @@ -1992,7 +1991,6 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg revs->grep_filter.pattern_type_option = GREP_PATTERN_TYPE_ERE; } else if (!strcmp(arg, "--regexp-ignore-case") || !strcmp(arg, "-i")) { revs->grep_filter.ignore_case = 1; - revs->grep_filter.regflags |= REG_ICASE; DIFF_OPT_SET(&revs->diffopt, PICKAXE_IGNORE_CASE); } else if (!strcmp(arg, "--fixed-strings") || !strcmp(arg, "-F")) { revs->grep_filter.pattern_type_option = GREP_PATTERN_TYPE_FIXED; But as all this code cleanup isn't needed for fixing this bug, and I'd really like to get this series merged into next/master ASAP so I can start submitting the grep/pcre patches that are actually interesting, let's leave this orthogonal code cleanup for now.