Given the following test case: #define f(x, y) "x y" extern void abort (void); int main () { const char *str1 = f("a", "\"a\""); const char *str2 = f( \t, " \t"); if (strcmp (str1, "\"a\" \"\\\"a\\\"\"")) abort (); if (strcmp (str2, "\t \" \\t\"")) abort (); return 0; }
Gcc 2.95.3 will accept it and do the right thing: $ gcc -v Reading specs from /opt/local/fsf/lib/gcc-lib/i686-pc-linux-gnu/2.95.3/specs gcc version 2.95.3 20010315 (release) $ gcc -traditional-cpp -o macroargs macroargs.c $ ./macroargs $ Current gcc 4.X fails to accept this code. For example, using the Fedora Core 6 compiler: $ /usr/bin/gcc -v Using built-in specs. Target: i386-redhat-linux Configured with: ../configure --prefix=/usr --mandir=/usr/share/man --infodir=/usr/share/info --enable-shared --enable-threads=posix --enable-checking=release --with-system-zlib --enable-__cxa_atexit --disable-libunwind-exceptions --enable-libgcj-multifile --enable-languages=c,c++,objc,obj-c++,java,fortran,ada --enable-java-awt=gtk --disable-dssi --enable-plugin --with-java-home=/usr/lib/jvm/java-1.4.2-gcj-1.4.2.0/jre --with-cpu=generic --host=i386-redhat-linux Thread model: posix gcc version 4.1.1 20061011 (Red Hat 4.1.1-30) $ /usr/bin/gcc -traditional-cpp -o macroargs macroargs.c macroargs.c: In function ‘main’: macroargs.c:7: error: expected ‘,’ or ‘;’ before ‘a’ macroargs.c:7: error: stray ‘\’ in program macroargs.c:7: error: missing terminating " character macroargs.c:8: error: stray ‘\’ in program macroargs.c:12: error: ‘str2’ undeclared (first use in this function) macroargs.c:12: error: (Each undeclared identifier is reported only once macroargs.c:12: error: for each function it appears in.) The attached patch (also posted to gcc-patches) will fix it, although it's handling of quoted arguments is not exactly identical to gcc 2.95.3. Notice the difference between the test case above, and the test case included with the patch. Here is an example of a patched gcc's behavior: $ /opt/specifix/experimental/bin/gcc -v Using built-in specs. Target: i686-pc-linux-gnu Configured with: /src/specifix/experimental/src/gcc/configure --enable-languages=c,c++,fortran --prefix=/opt/specifix/experimental --disable-werror --disable-bootstrap --with-mpfr=/opt/specifix/experimental --with-gmp=/opt/specifix/experimental --cache-file=/dev/null --srcdir=/src/specifix/experimental/src/gcc Thread model: posix gcc version 4.3.0 20061231 (experimental) $ /opt/specifix/experimental/bin/gcc -traditional-cpp -o macroargs macroargs.c $ Note however that the above test case WILL abort if run, while the test case included with the patch will not, due to the handling of leading and trailing whitespace in macro args. See the code inside "#if 0 ... #endif" in the patch and the associated comments. Either the current gcc testsuite is wrong in how it tests for this whitespace, or gcc 2.95.3 is wrong. It's not clear to me which behavior is more correct. -Fred
Index: gcc/gcc/testsuite/ChangeLog.specifix =================================================================== RCS file: gcc/gcc/testsuite/ChangeLog.specifix diff -N gcc/gcc/testsuite/ChangeLog.specifix --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ gcc/gcc/testsuite/ChangeLog.specifix 13 Aug 2006 20:59:50 -0000 @@ -0,0 +1,4 @@ +2006-08-11 Fred Fish <[EMAIL PROTECTED]> + + * gcc.dg/cpp/trad/macroargs.c: Add code to test quoting in + macro expansions. Index: gcc/gcc/testsuite/gcc.dg/cpp/trad/macroargs.c =================================================================== RCS file: /cvsroots/latest/src/gcc/gcc/testsuite/gcc.dg/cpp/trad/macroargs.c,v retrieving revision 1.1.1.1 diff -u -p -r1.1.1.1 macroargs.c --- gcc/gcc/testsuite/gcc.dg/cpp/trad/macroargs.c 8 Oct 2005 18:59:16 -0000 1.1.1.1 +++ gcc/gcc/testsuite/gcc.dg/cpp/trad/macroargs.c 13 Aug 2006 20:58:12 -0000 @@ -8,6 +8,17 @@ extern void abort (void); +void testquoting () +{ + const char *str1 = f("a", "\"a\""); + const char *str2 = f( \t, " \t"); + + if (strcmp (str1, "\"a\" \"\\\"a\\\"\"")) + abort (); + if (strcmp (str2, " \t \" \\t\"")) + abort (); +} + int main () { const char *str1 = f( foo ,bar); @@ -26,5 +37,7 @@ foo , 2"), "1 , 2")) abort (); + testquoting (); + return 0; } Index: gcc/libcpp/ChangeLog.specifix =================================================================== RCS file: gcc/libcpp/ChangeLog.specifix diff -N gcc/libcpp/ChangeLog.specifix --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ gcc/libcpp/ChangeLog.specifix 13 Aug 2006 20:59:01 -0000 @@ -0,0 +1,7 @@ +2006-08-09 Fred Fish <[EMAIL PROTECTED]> + + * traditional.c (replace_args_and_push): Add local variable + cxtquote, calculate the replacement text size assuming a + worst case of every input character quoted with backslash, + and properly handle output quoting of quote characters in + actual arguments used in function-like macros. Index: gcc/libcpp/traditional.c =================================================================== RCS file: /cvsroots/latest/src/gcc/libcpp/traditional.c,v retrieving revision 1.1.1.1 diff -u -p -r1.1.1.1 traditional.c --- gcc/libcpp/traditional.c 8 Oct 2005 19:00:54 -0000 1.1.1.1 +++ gcc/libcpp/traditional.c 13 Aug 2006 20:58:12 -0000 @@ -827,8 +827,11 @@ replace_args_and_push (cpp_reader *pfile uchar *p; _cpp_buff *buff; size_t len = 0; + int cxtquote = 0; - /* Calculate the length of the argument-replaced text. */ + /* Get an estimate of the length of the argument-replaced text. + This is a worst case estimate, assuming that every replacement + text character needs quoting. */ for (exp = macro->exp.text;;) { struct block *b = (struct block *) exp; @@ -836,7 +839,7 @@ replace_args_and_push (cpp_reader *pfile len += b->text_len; if (b->arg_index == 0) break; - len += (fmacro->args[b->arg_index] + len += 2 * (fmacro->args[b->arg_index] - fmacro->args[b->arg_index - 1] - 1); exp += BLOCK_LEN (b->text_len); } @@ -845,21 +848,69 @@ replace_args_and_push (cpp_reader *pfile buff = _cpp_get_buff (pfile, len + 1); /* Copy the expansion and replace arguments. */ + /* Accumulate actual length, including quoting as necessary */ p = BUFF_FRONT (buff); + len = 0; for (exp = macro->exp.text;;) { struct block *b = (struct block *) exp; size_t arglen; + int argquote; + uchar *base; + uchar *in; - memcpy (p, b->text, b->text_len); - p += b->text_len; + len += b->text_len; + /* Copy the non-argument text literally, keeping + track of whether matching quotes have been seen. */ + for (arglen = b->text_len, in = b->text; arglen > 0; arglen--) + { + if (*in == '"') + cxtquote ^= 1; + *p++ = *in++; + } + /* Done if no more arguments */ if (b->arg_index == 0) break; arglen = (fmacro->args[b->arg_index] - fmacro->args[b->arg_index - 1] - 1); - memcpy (p, pfile->out.base + fmacro->args[b->arg_index - 1], - arglen); - p += arglen; + base = pfile->out.base + fmacro->args[b->arg_index - 1]; + in = base; +#if 0 + /* Skip leading whitespace in the text for the argument to + be substituted. To be compatible with gcc 2.95, we would + also need to trim trailing whitespace. Gcc 2.95 trims + leading and trailing whitespace, which may be a bug. The + current gcc testsuite explicitly checks that this leading + and trailing whitespace in actual arguments is + preserved. */ + while (arglen > 0 && is_space (*in)) + { + in++; + arglen--; + } +#endif + for (argquote = 0; arglen > 0; arglen--) + { + if (cxtquote && *in == '"') + { + if (in > base && *(in-1) != '\\') + argquote ^= 1; + /* Always add backslash before double quote if argument + is expanded in a quoted context */ + *p++ = '\\'; + len++; + } + else if (cxtquote && argquote && *in == '\\') + { + /* Always add backslash before a backslash in an argument + that is expanded in a quoted context and also in the + range of a quoted context in the argument itself. */ + *p++ = '\\'; + len++; + } + *p++ = *in++; + len++; + } exp += BLOCK_LEN (b->text_len); }