Hello,

There are various program, especially the one written with GNU grep in
mind, that expects various flags that grep in base doesn't have.  While
some of the flags (like --color) can be easily worked out (i.e. by
patching/customising these programs) one thing that it isn't easily
workable is --null, because it alters the way grep outputs its data.

--null makes grep output an ASCII NUL byte after the file name, so a
program can parse the output of grep unambiguously even when the file
names contains "funny" characters, such as a newline.

GNU grep isn't the only one with a --null flag, also FreeBSD and NetBSD
grep do (at least by looking at their manpages), so it's somewhat
widespread.

I searched the archives on marc.info but I haven't seen a previous
discussion about this.

The following patch was tried against GNU grep (installed from packages)
and seem to behave consistently.

I used the same text of the FreeBSD/NetBSD manpage for the description
of the --null option, but I really dislike it: it feels way to verbose
for what it's trying to say, but I wasn't able to come up with something
better.

I'm not familiar at all with the grep codebase, so I hope I'm not
missing something.  If this has some chances of being accepted, I guess
I should also add some regress test; I'll try to work on them soon, but
in the meantime I'm sending this.

Thanks,

Omar Polo


diff e992327fc31d0277a6f8518613a7db1b9face78b /home/op/w/openbsd-src
blob - 5cc228df222c54a0553f289b5da8bbbe6afd171e
file + usr.bin/grep/grep.1
--- usr.bin/grep/grep.1
+++ usr.bin/grep/grep.1
@@ -49,6 +49,7 @@
 .Op Fl -context Ns Op = Ns Ar num
 .Op Fl -label Ns = Ns Ar name
 .Op Fl -line-buffered
+.Op Fl -null
 .Op Ar pattern
 .Op Ar
 .Ek
@@ -297,6 +298,25 @@ instead of the filename before lines.
 Force output to be line buffered.
 By default, output is line buffered when standard output is a terminal
 and block buffered otherwise.
+.It Fl -null
+Output a zero byte (the ASCII NUL character) instead of the character
+that normally follows a file name.
+For example,
+.Nm Fl l Fl -null
+outputs a zero byte after each file name instead of the usual newline.
+This option makes the output unambiguous, even in the presence of file
+names containing unusual characters like newlines.
+This option can be used with commands like
+.Xr find 1
+.Fl print0 Ns ,
+.Xr perl 1
+.Fl 0 Ns ,
+.Xr sort 1
+.Fl z Ns , and
+.Xr args 1
+.Fl 0
+to process arbitrary file names, even those that contain newline
+characters.
 .El
 .Sh EXIT STATUS
 The
blob - f41b5e20ca68c9e9a36d2f7dd3c44329c621f29b
file + usr.bin/grep/grep.c
--- usr.bin/grep/grep.c
+++ usr.bin/grep/grep.c
@@ -80,6 +80,7 @@ int    vflag;         /* -v: only show non-matching lines */
 int     wflag;         /* -w: pattern must start and end on word boundaries */
 int     xflag;         /* -x: pattern must match entire line */
 int     lbflag;        /* --line-buffered */
+int     nullflag;      /* --null */
 const char *labelname; /* --label=name */
 
 int binbehave = BIN_FILE_BIN;
@@ -89,6 +90,7 @@ enum {
        HELP_OPT,
        MMAP_OPT,
        LINEBUF_OPT,
+       NULL_OPT,
        LABEL_OPT,
 };
 
@@ -134,6 +136,7 @@ static const struct option long_options[] =
        {"mmap",                no_argument,            NULL, MMAP_OPT},
        {"label",               required_argument,      NULL, LABEL_OPT},
        {"line-buffered",       no_argument,            NULL, LINEBUF_OPT},
+       {"null",                no_argument,            NULL, NULL_OPT},
        {"after-context",       required_argument,      NULL, 'A'},
        {"before-context",      required_argument,      NULL, 'B'},
        {"context",             optional_argument,      NULL, 'C'},
@@ -436,6 +439,9 @@ main(int argc, char *argv[])
                case LINEBUF_OPT:
                        lbflag = 1;
                        break;
+               case NULL_OPT:
+                       nullflag = 1;
+                       break;
                case HELP_OPT:
                default:
                        usage();
blob - b3d24ae662beb72c5632190c5c819bcc92f0389a
file + usr.bin/grep/grep.h
--- usr.bin/grep/grep.h
+++ usr.bin/grep/grep.h
@@ -68,7 +68,7 @@ extern int     cflags, eflags;
 extern int      Aflag, Bflag, Eflag, Fflag, Hflag, Lflag,
                 Rflag, Zflag,
                 bflag, cflag, hflag, iflag, lflag, mflag, nflag, oflag, qflag,
-                sflag, vflag, wflag, xflag;
+                sflag, vflag, wflag, xflag, nullflag;
 extern int      binbehave;
 extern const char *labelname;
 
blob - e16d08e7d859609c2ccbc0ac4bba670188f81abf
file + usr.bin/grep/util.c
--- usr.bin/grep/util.c
+++ usr.bin/grep/util.c
@@ -172,13 +172,13 @@ procfile(char *fn)
 
        if (cflag) {
                if (!hflag)
-                       printf("%s:", ln.file);
+                       printf("%s%c", ln.file, nullflag ? '\0' : ':');
                printf("%llu%s\n", c, overflow ? "+" : "");
        }
        if (lflag && c != 0)
-               printf("%s\n", fn);
+               printf("%s%c", fn, nullflag ? '\0' : '\n');
        if (Lflag && c == 0)
-               printf("%s\n", fn);
+               printf("%s%c", fn, nullflag ? '\0' : '\n');
        if (c && !cflag && !lflag && !Lflag &&
            binbehave == BIN_FILE_BIN && nottext && !qflag)
                printf("Binary file %s matches\n", fn);
@@ -266,9 +266,9 @@ print:
                        if (Bflag > 0)
                                printqueue();
                        linesqueued = 0;
-                       printline(l, ':', oflag ? &pmatch : NULL);
+                       printline(l, nullflag ? '\0' : ':', oflag ? &pmatch : 
NULL);
                } else {
-                       printline(l, '-', oflag ? &pmatch : NULL);
+                       printline(l, nullflag ? '\0' : '-', oflag ? &pmatch : 
NULL);
                        tail--;
                }
        }
@@ -660,7 +660,7 @@ printline(str_t *line, int sep, regmatch_t *pmatch)
        }
        if (nflag) {
                if (n)
-                       putchar(sep);
+                       putchar(nullflag ? '\0' : sep);
                printf("%lld", line->line_no);
                ++n;
        }

Reply via email to