Given the following test case:

  #define f(x, y) "x y"
  
  extern void abort (void);
  
  int main ()
  {
    const char *str1 = f("a", "\"a\"");
    const char *str2 = f( \t, " \t");
  
    if (strcmp (str1, "\"a\" \"\\\"a\\\"\""))
      abort ();
    if (strcmp (str2, "\t \" \\t\""))
      abort ();
    return 0;
  }

Gcc 2.95.3 will accept it and do the right thing:

  $ gcc -v
  Reading specs from /opt/local/fsf/lib/gcc-lib/i686-pc-linux-gnu/2.95.3/specs
  gcc version 2.95.3 20010315 (release)
  $ gcc -traditional-cpp -o macroargs macroargs.c
  $ ./macroargs
  $

Current gcc 4.X fails to accept this code.  For example, using the Fedora Core 
6 compiler:

  $ /usr/bin/gcc -v
  Using built-in specs.
  Target: i386-redhat-linux
  Configured with: ../configure --prefix=/usr --mandir=/usr/share/man 
--infodir=/usr/share/info --enable-shared --enable-threads=posix 
--enable-checking=release --with-system-zlib --enable-__cxa_atexit 
--disable-libunwind-exceptions --enable-libgcj-multifile 
--enable-languages=c,c++,objc,obj-c++,java,fortran,ada --enable-java-awt=gtk 
--disable-dssi --enable-plugin 
--with-java-home=/usr/lib/jvm/java-1.4.2-gcj-1.4.2.0/jre --with-cpu=generic 
--host=i386-redhat-linux
  Thread model: posix
  gcc version 4.1.1 20061011 (Red Hat 4.1.1-30)
  $ /usr/bin/gcc -traditional-cpp -o macroargs macroargs.c
  macroargs.c: In function ‘main’:
  macroargs.c:7: error: expected ‘,’ or ‘;’ before ‘a’
  macroargs.c:7: error: stray ‘\’ in program
  macroargs.c:7: error: missing terminating " character
  macroargs.c:8: error: stray ‘\’ in program
  macroargs.c:12: error: ‘str2’ undeclared (first use in this function)
  macroargs.c:12: error: (Each undeclared identifier is reported only once
  macroargs.c:12: error: for each function it appears in.)

The attached patch (also posted to gcc-patches) will fix it, although
it's handling of quoted arguments is not exactly identical to gcc
2.95.3.  Notice the difference between the test case above, and the
test case included with the patch.

Here is an example of a patched gcc's behavior:

  $ /opt/specifix/experimental/bin/gcc -v
  Using built-in specs.
  Target: i686-pc-linux-gnu
  Configured with: /src/specifix/experimental/src/gcc/configure 
--enable-languages=c,c++,fortran --prefix=/opt/specifix/experimental 
--disable-werror --disable-bootstrap --with-mpfr=/opt/specifix/experimental 
--with-gmp=/opt/specifix/experimental --cache-file=/dev/null 
--srcdir=/src/specifix/experimental/src/gcc
  Thread model: posix
  gcc version 4.3.0 20061231 (experimental)
  $ /opt/specifix/experimental/bin/gcc -traditional-cpp -o macroargs macroargs.c
  $

Note however that the above test case WILL abort if run, while the
test case included with the patch will not, due to the handling of
leading and trailing whitespace in macro args.  See the code inside
"#if 0 ... #endif" in the patch and the associated comments.  Either the
current gcc testsuite is wrong in how it tests for this whitespace, or
gcc 2.95.3 is wrong.  It's not clear to me which behavior is more correct.

-Fred


Index: gcc/gcc/testsuite/ChangeLog.specifix
===================================================================
RCS file: gcc/gcc/testsuite/ChangeLog.specifix
diff -N gcc/gcc/testsuite/ChangeLog.specifix
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ gcc/gcc/testsuite/ChangeLog.specifix	13 Aug 2006 20:59:50 -0000
@@ -0,0 +1,4 @@
+2006-08-11  Fred Fish  <[EMAIL PROTECTED]>
+
+	* gcc.dg/cpp/trad/macroargs.c: Add code to test quoting in
+	macro expansions.
Index: gcc/gcc/testsuite/gcc.dg/cpp/trad/macroargs.c
===================================================================
RCS file: /cvsroots/latest/src/gcc/gcc/testsuite/gcc.dg/cpp/trad/macroargs.c,v
retrieving revision 1.1.1.1
diff -u -p -r1.1.1.1 macroargs.c
--- gcc/gcc/testsuite/gcc.dg/cpp/trad/macroargs.c	8 Oct 2005 18:59:16 -0000	1.1.1.1
+++ gcc/gcc/testsuite/gcc.dg/cpp/trad/macroargs.c	13 Aug 2006 20:58:12 -0000
@@ -8,6 +8,17 @@
 
 extern void abort (void);
 
+void testquoting ()
+{
+  const char *str1 = f("a", "\"a\"");
+  const char *str2 = f( \t, " \t");
+
+  if (strcmp (str1, "\"a\"  \"\\\"a\\\"\""))
+    abort ();
+  if (strcmp (str2, " \t  \" \\t\""))
+    abort ();
+}
+
 int main ()
 {
   const char *str1 = f( foo ,bar);
@@ -26,5 +37,7 @@ foo
 , 2"), "1 , 2"))	
     abort ();
 
+  testquoting ();
+
   return 0;
 }
Index: gcc/libcpp/ChangeLog.specifix
===================================================================
RCS file: gcc/libcpp/ChangeLog.specifix
diff -N gcc/libcpp/ChangeLog.specifix
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ gcc/libcpp/ChangeLog.specifix	13 Aug 2006 20:59:01 -0000
@@ -0,0 +1,7 @@
+2006-08-09  Fred Fish  <[EMAIL PROTECTED]>
+
+	* traditional.c (replace_args_and_push): Add local variable
+	cxtquote, calculate the replacement text size assuming a 
+	worst case of every input character quoted with backslash,
+	and properly handle output quoting of quote characters in
+	actual arguments used in function-like macros.
Index: gcc/libcpp/traditional.c
===================================================================
RCS file: /cvsroots/latest/src/gcc/libcpp/traditional.c,v
retrieving revision 1.1.1.1
diff -u -p -r1.1.1.1 traditional.c
--- gcc/libcpp/traditional.c	8 Oct 2005 19:00:54 -0000	1.1.1.1
+++ gcc/libcpp/traditional.c	13 Aug 2006 20:58:12 -0000
@@ -827,8 +827,11 @@ replace_args_and_push (cpp_reader *pfile
       uchar *p;
       _cpp_buff *buff;
       size_t len = 0;
+      int cxtquote = 0;
 
-      /* Calculate the length of the argument-replaced text.  */
+      /* Get an estimate of the length of the argument-replaced text.
+	 This is a worst case estimate, assuming that every replacement
+	 text character needs quoting.  */
       for (exp = macro->exp.text;;)
 	{
 	  struct block *b = (struct block *) exp;
@@ -836,7 +839,7 @@ replace_args_and_push (cpp_reader *pfile
 	  len += b->text_len;
 	  if (b->arg_index == 0)
 	    break;
-	  len += (fmacro->args[b->arg_index]
+	  len += 2 * (fmacro->args[b->arg_index]
 		  - fmacro->args[b->arg_index - 1] - 1);
 	  exp += BLOCK_LEN (b->text_len);
 	}
@@ -845,21 +848,69 @@ replace_args_and_push (cpp_reader *pfile
       buff = _cpp_get_buff (pfile, len + 1);
 
       /* Copy the expansion and replace arguments.  */
+      /* Accumulate actual length, including quoting as necessary */
       p = BUFF_FRONT (buff);
+      len = 0;
       for (exp = macro->exp.text;;)
 	{
 	  struct block *b = (struct block *) exp;
 	  size_t arglen;
+	  int argquote;
+	  uchar *base;
+	  uchar *in;
 
-	  memcpy (p, b->text, b->text_len);
-	  p += b->text_len;
+	  len += b->text_len;
+	  /* Copy the non-argument text literally, keeping
+	     track of whether matching quotes have been seen. */
+	  for (arglen = b->text_len, in = b->text; arglen > 0; arglen--)
+	    {
+	      if (*in == '"')
+		cxtquote ^= 1;
+	      *p++ = *in++;
+	    }
+	  /* Done if no more arguments */
 	  if (b->arg_index == 0)
 	    break;
 	  arglen = (fmacro->args[b->arg_index]
 		    - fmacro->args[b->arg_index - 1] - 1);
-	  memcpy (p, pfile->out.base + fmacro->args[b->arg_index - 1],
-		  arglen);
-	  p += arglen;
+	  base = pfile->out.base + fmacro->args[b->arg_index - 1];
+	  in = base;
+#if 0
+	  /* Skip leading whitespace in the text for the argument to
+	     be substituted. To be compatible with gcc 2.95, we would
+	     also need to trim trailing whitespace. Gcc 2.95 trims
+	     leading and trailing whitespace, which may be a bug.  The
+	     current gcc testsuite explicitly checks that this leading
+	     and trailing whitespace in actual arguments is
+	     preserved. */
+	  while (arglen > 0 && is_space (*in))
+	    {
+	      in++;
+	      arglen--;
+	    }
+#endif
+	  for (argquote = 0; arglen > 0; arglen--)
+	    {
+	      if (cxtquote && *in == '"')
+		{
+		  if (in > base && *(in-1) != '\\')
+		    argquote ^= 1;
+		  /* Always add backslash before double quote if argument
+		     is expanded in a quoted context */
+		  *p++ = '\\';
+		  len++;
+		}
+	      else if (cxtquote && argquote && *in == '\\')
+		{
+		  /* Always add backslash before a backslash in an argument
+		     that is expanded in a quoted context and also in the
+		     range of a quoted context in the argument itself. */
+		  *p++ = '\\';
+		  len++;
+		}
+	      *p++ = *in++;
+	      len++;
+	    }
 	  exp += BLOCK_LEN (b->text_len);
 	}
 

Reply via email to