Hi Paul,
On 2015-06-18 13:25, Paul Tan wrote:
> diff --git a/builtin/am.c b/builtin/am.c
> index e9a3687..7b97ea8 100644
> --- a/builtin/am.c
> +++ b/builtin/am.c
> @@ -121,6 +121,96 @@ static void am_destroy(const struct am_state *state)
> strbuf_release(&sb);
> }
>
> +/*
> + * Returns 1 if the file looks like a piece of email a-la RFC2822, 0
> otherwise.
> + * We check this by grabbing all the non-indented lines and seeing if they
> look
> + * like they begin with valid header field names.
> + */
> +static int is_email(const char *filename)
> +{
> + struct strbuf sb = STRBUF_INIT;
> + FILE *fp = xfopen(filename, "r");
> + int ret = 1;
> +
> + while (!strbuf_getline(&sb, fp, '\n')) {
> + const char *x;
> +
> + strbuf_rtrim(&sb);
> +
> + if (!sb.len)
> + break; /* End of header */
> +
> + /* Ignore indented folded lines */
> + if (*sb.buf == '\t' || *sb.buf == ' ')
> + continue;
> +
> + /* It's a header if it matches the regexp "^[!-9;-~]+:" */
Why not just compile a regex and use it here? We use regexes elsewhere anyway...
> +/**
> + * Attempts to detect the patch_format of the patches contained in `paths`,
> + * returning the PATCH_FORMAT_* enum value. Returns PATCH_FORMAT_UNKNOWN if
> + * detection fails.
> + */
> +static int detect_patch_format(struct string_list *paths)
> +{
> + enum patch_format ret = PATCH_FORMAT_UNKNOWN;
> + struct strbuf l1 = STRBUF_INIT;
> + struct strbuf l2 = STRBUF_INIT;
> + struct strbuf l3 = STRBUF_INIT;
> + FILE *fp;
> +
> + /*
> + * We default to mbox format if input is from stdin and for directories
> + */
> + if (!paths->nr || !strcmp(paths->items->string, "-") ||
> + is_directory(paths->items->string)) {
> + ret = PATCH_FORMAT_MBOX;
> + goto done;
> + }
> +
> + /*
> + * Otherwise, check the first 3 lines of the first patch, starting
> + * from the first non-blank line, to try to detect its format.
> + */
> + fp = xfopen(paths->items->string, "r");
> + while (!strbuf_getline(&l1, fp, '\n')) {
> + strbuf_trim(&l1);
> + if (l1.len)
> + break;
> + }
> + strbuf_getline(&l2, fp, '\n');
We should test the return value of `strbuf_getline()`; if EOF was reached
already, `strbuf_getwholeline()` does not touch the strbuf. I know, the strbuf
is still initialized empty here, but it is too easy to forget when changing
this code.
> + strbuf_trim(&l2);
> + strbuf_getline(&l3, fp, '\n');
> + strbuf_trim(&l3);
> + fclose(fp);
> +
> + if (starts_with(l1.buf, "From ") || starts_with(l1.buf, "From: "))
> + ret = PATCH_FORMAT_MBOX;
Hmm. We can test that earlier and return without reading from the file any
further, I think.
> + else if (l1.len && l2.len && l3.len && is_email(paths->items->string))
> + ret = PATCH_FORMAT_MBOX;
Maybe we can do better than this by folding the `is_email() function into this
here function, reusing the same strbuf to read the lines and keeping track of
the email header lines we saw... I would really like to avoid opening the same
file twice just to figure out whether it is in email format.
The rest looks very nice!
Dscho
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html