Within the processing of the commit message, check for a scissors line
or a patchbreak line first (before checking for in-body headers) so that
a subsequent patch modifying the processing of in-body headers would not
cause a scissors line or patchbreak line to be misidentified.

If a line could be both an in-body header and a scissors line (for
example, "From: -- >8 --"), this is considered a fatal error
(previously, it would be interpreted as an in-body header).  (It is not
possible for a line to be both an in-body header and a patchbreak line,
since both require different prefixes.)

The following enumeration shows that processing is the same except (as
described above) the in-body header + scissors line case.

o in-body header (check_header OK)
  o passes UTF-8 conversion
    o [described above] is scissors line
    o [not possible] is patchbreak line
    o [not possible] is blank line
    o is none of the above - processed as header
  o fails UTF-8 conversion - processed as header
o not in-body header
  o passes UTF-8 conversion
    o is scissors line - processed as scissors
    o is patchbreak line - processed as patchbreak
    o is blank line - ignored if in header_stage
    o is none of the above - log message
  o fails UTF-8 conversion - input error

As for the result left in "line" (after the invocation of
handle_commit_msg), it is unused (by its caller, handle_filter, and by
handle_filter's callers, handle_boundary and handle_body) unless this
line is a patchbreak line, in which case handle_patch is subsequently
called (in handle_filter) on "line". In this case, "line" must have
passed UTF-8 conversion both before and after this patch, so the result
is still the same overall.

Signed-off-by: Jonathan Tan <jonathanta...@google.com>
---
 mailinfo.c | 145 ++++++++++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 115 insertions(+), 30 deletions(-)

diff --git a/mailinfo.c b/mailinfo.c
index e19abe3..23a56c2 100644
--- a/mailinfo.c
+++ b/mailinfo.c
@@ -340,23 +340,56 @@ static struct strbuf *decode_b_segment(const struct 
strbuf *b_seg)
        return out;
 }
 
-static int convert_to_utf8(struct mailinfo *mi,
-                          struct strbuf *line, const char *charset)
+/*
+ * Attempts to convert line into UTF-8, storing the result in line.
+ *
+ * This differs from convert_to_utf8 in that conversion non-success is not
+ * considered an error case - mi->input_error is not set, and no error message
+ * is printed.
+ *
+ * If the conversion is unnecessary, returns 0 and stores NULL in old_buf (if
+ * old_buf is not NULL).
+ *
+ * If the conversion is successful, returns 0 and stores the unconverted string
+ * in old_buf and old_len (if they are respectively not NULL).
+ *
+ * If the conversion is unsuccessful, returns -1.
+ */
+static int try_convert_to_utf8(const struct mailinfo *mi, struct strbuf *line,
+                              const char *charset, char **old_buf,
+                              size_t *old_len)
 {
-       char *out;
+       char *utf8;
 
-       if (!mi->metainfo_charset || !charset || !*charset)
+       if (!mi->metainfo_charset || !charset || !*charset ||
+           same_encoding(mi->metainfo_charset, charset)) {
+               if (old_buf)
+                       *old_buf = NULL;
                return 0;
+       }
 
-       if (same_encoding(mi->metainfo_charset, charset))
+       utf8 = reencode_string(line->buf, mi->metainfo_charset, charset);
+       if (utf8) {
+               char *temp = strbuf_detach(line, old_len);
+               if (old_buf)
+                       *old_buf = temp;
+               strbuf_attach(line, utf8, strlen(utf8), strlen(utf8));
                return 0;
-       out = reencode_string(line->buf, mi->metainfo_charset, charset);
-       if (!out) {
+       }
+       return -1;
+}
+
+/*
+ * Converts line into UTF-8, setting mi->input_error to -1 upon failure.
+ */
+static int convert_to_utf8(struct mailinfo *mi,
+                          struct strbuf *line, const char *charset)
+{
+       if (try_convert_to_utf8(mi, line, charset, NULL, NULL)) {
                mi->input_error = -1;
                return error("cannot convert from %s to %s",
                             charset, mi->metainfo_charset);
        }
-       strbuf_attach(line, out, strlen(out), strlen(out));
        return 0;
 }
 
@@ -515,6 +548,13 @@ static int check_header(struct mailinfo *mi,
        return ret;
 }
 
+static int check_header_raw(struct mailinfo *mi,
+                           char *buf, size_t len,
+                           struct strbuf *hdr_data[], int overwrite) {
+       const struct strbuf sb = {0, len, buf};
+       return check_header(mi, &sb, hdr_data, overwrite);
+}
+
 static void decode_transfer_encoding(struct mailinfo *mi, struct strbuf *line)
 {
        struct strbuf *ret;
@@ -623,32 +663,48 @@ static int is_scissors_line(const struct strbuf *line)
                gap * 2 < perforation);
 }
 
-static int handle_commit_msg(struct mailinfo *mi, struct strbuf *line)
+static int resembles_rfc2822_header(const struct strbuf *line)
 {
-       assert(!mi->filter_stage);
+       char *c;
 
-       if (mi->header_stage) {
-               if (!line->len || (line->len == 1 && line->buf[0] == '\n'))
+       if (!isalpha(line->buf[0]))
+               return 0;
+
+       for (c = line->buf + 1; *c != 0; c++) {
+               if (*c == ':')
+                       return 1;
+               else if (*c != '-' && !isalpha(*c))
                        return 0;
        }
+       return 0;
+}
 
-       if (mi->use_inbody_headers && mi->header_stage) {
-               mi->header_stage = check_header(mi, line, mi->s_hdr_data, 0);
-               if (mi->header_stage)
-                       return 0;
-       } else
-               /* Only trim the first (blank) line of the commit message
-                * when ignoring in-body headers.
-                */
-               mi->header_stage = 0;
+static int handle_commit_msg(struct mailinfo *mi, struct strbuf *line)
+{
+       int ret = 0;
+       int utf8_result;
+       char *old_buf;
+       size_t old_len;
+
+       assert(!mi->filter_stage);
 
-       /* normalize the log message to UTF-8. */
-       if (convert_to_utf8(mi, line, mi->charset.buf))
-               return 0; /* mi->input_error already set */
+       /*
+        * Obtain UTF8 for scissors line and patchbreak checks, but retain the
+        * undecoded line in case we need to process it as an in-body header.
+        */
+       utf8_result = try_convert_to_utf8(mi, line, mi->charset.buf, &old_buf,
+                                         &old_len);
 
-       if (mi->use_scissors && is_scissors_line(line)) {
+       if (!utf8_result && mi->use_scissors && is_scissors_line(line)) {
                int i;
 
+               if (resembles_rfc2822_header(line))
+                       /*
+                        * Explicitly reject scissor lines that resemble a RFC
+                        * 2822 header, to avoid being prone to error.
+                        */
+                       die("scissors line resembles RFC 2822 header");
+
                strbuf_setlen(&mi->log_message, 0);
                mi->header_stage = 1;
 
@@ -661,18 +717,47 @@ static int handle_commit_msg(struct mailinfo *mi, struct 
strbuf *line)
                                strbuf_release(mi->s_hdr_data[i]);
                        mi->s_hdr_data[i] = NULL;
                }
-               return 0;
+               goto handle_commit_msg_out;
        }
-
-       if (patchbreak(line)) {
+       if (!utf8_result && patchbreak(line)) {
                if (mi->message_id)
                        strbuf_addf(&mi->log_message,
                                    "Message-Id: %s\n", mi->message_id);
-               return 1;
+               ret = 1;
+               goto handle_commit_msg_out;
        }
 
+       if (mi->header_stage) {
+               char *buf = old_buf ? old_buf : line->buf;
+               if (buf[0] == 0 || (buf[0] == '\n' && buf[1] == 0))
+                       goto handle_commit_msg_out;
+       }
+
+       if (mi->use_inbody_headers && mi->header_stage) {
+               char *buf = old_buf ? old_buf : line->buf;
+               size_t len = old_buf ? old_len : line->len;
+               mi->header_stage = check_header_raw(mi, buf, len,
+                                                   mi->s_hdr_data, 0);
+               if (mi->header_stage)
+                       goto handle_commit_msg_out;
+       } else
+               /* Only trim the first (blank) line of the commit message
+                * when ignoring in-body headers.
+                */
+               mi->header_stage = 0;
+
+       /* If adding as a log message, conversion to UTF-8 is required. */
+       if (utf8_result) {
+               mi->input_error = -1;
+               error("cannot convert from %s to %s",
+                     mi->charset.buf, mi->metainfo_charset);
+               goto handle_commit_msg_out;
+       }
        strbuf_addbuf(&mi->log_message, line);
-       return 0;
+
+handle_commit_msg_out:
+       free(old_buf);
+       return ret;
 }
 
 static void handle_patch(struct mailinfo *mi, const struct strbuf *line)
-- 
2.10.0.rc2.20.g5b18e70

Reply via email to