On Sun, Jul 17, 2011 at 11:43:03AM -0400, Ted Unangst wrote:
> I recently learned that our grep does not support the \<\> syntax for
> word boundaries, only the somewhat more difficult to use [[:<:]] format.
> It's fairly easy to convert one to the other however.
>
if you do this, we will need to think carefully about how to document
it. grep(1) itself does not discuss REs, and instead points to
re_format(7). but you are proposing an extension to grep only.
can i ask why you want to support this? it is a gnu grep thing or
something?
jmc
> Index: grep.c
> ===================================================================
> RCS file: /home/tedu/cvs/src/usr.bin/grep/grep.c,v
> retrieving revision 1.44
> diff -u -p -r1.44 grep.c
> --- grep.c 8 Jul 2011 01:20:24 -0000 1.44
> +++ grep.c 17 Jul 2011 15:38:58 -0000
> @@ -163,6 +163,54 @@ struct option long_options[] =
> {NULL, no_argument, NULL, 0}
> };
>
> +#ifndef SMALL
> +char *
> +fix_word_boundaries(char *pat)
> +{
> + size_t newlen;
> + int bs, repl;
> + char c, *newpat, *p, *r;
> +
> + repl = 0;
> + p = pat;
> + while ((p = strstr(p, "\\<"))) {
> + p += 2;
> + repl++;
> + }
> + p = pat;
> + while ((p = strstr(p, "\\>"))) {
> + p += 2;
> + repl++;
> + }
> + if (!repl)
> + return pat;
> + newlen = strlen(pat) + 1 + repl * 5;
> + newpat = grep_malloc(newlen);
> + p = pat;
> + r = newpat;
> + bs = 0;
> + while ((c = *p++)) {
> + if (bs && (c == '<' || c == '>')) {
> + /* overwrite previous backspace */
> + snprintf(r-1, 8, "[[:%c:]]", c);
> + r += 6;
> + bs = 0;
> + continue;
> + } else if (!bs && c == '\\') {
> + bs = 1;
> + } else {
> + bs = 0;
> + }
> + *r++ = c;
> + }
> + *r = 0;
> + if (newlen <= strlen(newpat))
> + abort();
> + free(pat);
> + return newpat;
> +
> +}
> +#endif
>
> static void
> add_pattern(char *pat, size_t len)
> @@ -198,6 +246,12 @@ add_pattern(char *pat, size_t len)
> pattern[patterns] = grep_malloc(len + 1);
> memcpy(pattern[patterns], pat, len);
> pattern[patterns][len] = '\0';
> +#ifndef SMALL
> + if (!Fflag) {
> + pattern[patterns] =
> fix_word_boundaries(pattern[patterns]);
> + }
> +#endif
> +
> }
> ++patterns;
> }