tags 386912 fixed-upstream
thanks

Hello,

I tried revision 1.40 at:
http://cvs.savannah.gnu.org/viewcvs/gettext/gettext-tools/src/x-perl.c?rev=1.42&root=gettext&view=log

It fixes the bug. (Thanks a lot Bruno).

You may also want to apply 1.42, which adds some cleanup.

The patch for x-perl.c 1.40 is attached (it is quite long).

Kind Regards,
-- 
Nekral
--- ../orig/gettext-0.15/gettext-tools/src/x-perl.c     2006-05-08 
15:31:37.000000000 +0200
+++ gettext-0.15/gettext-tools/src/x-perl.c     2006-10-15 22:24:40.000000000 
+0200
@@ -760,7 +760,8 @@
 static token_ty *x_perl_lex (message_list_ty *mlp);
 static void x_perl_unlex (token_ty *tp);
 static bool extract_balanced (message_list_ty *mlp, int state,
-                             token_type_ty delim,
+                             token_type_ty delim, bool eat_delim,
+                             bool comma_delim,
                              flag_context_ty outer_context,
                              flag_context_list_iterator_ty context_iter,
                              int arg, struct arglist_parser *argparser);
@@ -1369,7 +1370,7 @@
               real_file_name, line_number);
 #endif
 
-      if (extract_balanced (mlp, 0, token_type_rbrace,
+      if (extract_balanced (mlp, 0, token_type_rbrace, true, false,
                            null_context, null_context_list_iterator,
                            1, arglist_parser_alloc (mlp, NULL)))
        return;
@@ -1560,7 +1561,7 @@
                else
                  {
                    x_perl_unlex (t1);
-                   if (extract_balanced (mlp, 1, token_type_rbrace,
+                   if (extract_balanced (mlp, 1, token_type_rbrace, true, 
false,
                                          null_context, context_iter,
                                          1, arglist_parser_alloc (mlp, 
&shapes)))
                      return;
@@ -1591,7 +1592,7 @@
          fprintf (stderr, "%s:%d: extracting balanced '{' after varname\n",
                   real_file_name, line_number);
 #endif
-         extract_balanced (mlp, 0, token_type_rbrace,
+         extract_balanced (mlp, 0, token_type_rbrace, true, false,
                            null_context, null_context_list_iterator,
                            1, arglist_parser_alloc (mlp, NULL));
          break;
@@ -1601,7 +1602,7 @@
          fprintf (stderr, "%s:%d: extracting balanced '[' after varname\n",
                   real_file_name, line_number);
 #endif
-         extract_balanced (mlp, 0, token_type_rbracket,
+         extract_balanced (mlp, 0, token_type_rbracket, true, false,
                            null_context, null_context_list_iterator,
                            1, arglist_parser_alloc (mlp, NULL));
          break;
@@ -2782,7 +2783,7 @@
 /* The file is broken into tokens.  Scan the token stream, looking for
    a keyword, followed by a left paren, followed by a string.  When we
    see this sequence, we have something to remember.  We assume we are
-   looking at a valid C or C++ program, and leave the complaints about
+   looking at a valid Perl program, and leave the complaints about
    the grammar to the compiler.
 
      Normal handling: Look for
@@ -2791,11 +2792,64 @@
        keyword ( ... msgid ... msgid_plural ... )
 
    We use recursion because the arguments before msgid or between msgid
-   and msgid_plural can contain subexpressions of the same form.  */
+   and msgid_plural can contain subexpressions of the same form.
+
+   In Perl, parentheses around function arguments can be omitted.
+
+   The general rules are:
+     1) Functions declared with a prototype take exactly the specified number
+        of arguments.
+          sub one_arg ($) { ... }
+          sub two_args ($$) { ... }
+     2) When a function name is immediately followed by an opening parenthesis,
+        the argument list ends at the corresponding closing parenthesis.
+
+   If rule 1 and rule 2 are contradictory, i.e. when the program calls a
+   function with an explicit argument list and the wrong number of arguments,
+   the program is invalid:
+     sub two_args ($$) { ... }
+     foo two_args (x), y             - invalid due to rules 1 and 2
+
+   Ambiguities are resolved as follows:
+     3) Some built-ins, such as 'abs', 'sqrt', 'sin', 'cos', ..., and functions
+        declared with a prototype of exactly one argument take exactly one
+        argument:
+          foo sin x, y  ==>  foo (sin (x), y)
+          sub one_arg ($) { ... }
+          foo one_arg x, y, z  ==>  foo (one_arg (x), y, z)
+     4) Other identifiers, if not immediately followed by an opening
+        parenthesis, consume the entire remaining argument list:
+          foo bar x, y  ==>  foo (bar (x, y))
+          sub two_args ($$) { ... }
+          foo two_args x, y  ==>  foo (two_args (x, y))
+
+   Other series of comma separated expressions without a function name at
+   the beginning are comma expressions:
+          sub two_args ($$) { ... }
+          foo two_args x, (y, z)  ==>  foo (two_args (x, (y, z)))
+   Note that the evaluation of comma expressions returns a list of values
+   when in list context (e.g. inside the argument list of a function without
+   prototype) but only one value when inside the argument list of a function
+   with a prototype:
+          sub print3 ($$$) { print @_ }
+          print3 5, (6, 7), 8  ==>  578
+          print 5, (6, 7), 8  ==>  5678
+
+   Where rule 3 or 4 contradict rule 1 or 2, the program is invalid:
+     sin (x, y)                      - invalid due to rules 2 and 3
+     sub one_arg ($) { ... }
+     one_arg (x, y)                  - invalid due to rules 2 and 3
+     sub two_args ($$) { ... }
+     foo two_args x, y, z            - invalid due to rules 1 and 4
+ */
 
 /* Extract messages until the next balanced closing parenthesis.
    Extracted messages are added to MLP.
 
+   DELIM can be either token_type_rbrace, token_type_rbracket,
+   token_type_rparen.  Additionally, if COMMA_DELIM is true, parsing
+   stops at the next comma outside parentheses.
+
    ARG is the current argument list position, starts with 1.
    ARGPARSER is the corresponding argument list parser.
 
@@ -2805,33 +2859,31 @@
 
    0 - initial state
    1 - keyword has been seen
-   2 - extractable string has been seen
-   3 - a dot operator after an extractable string has been seen
 
-   States 2 and 3 are "fragile", the parser will remain in state 2
-   as long as only opening parentheses are seen, a transition to
-   state 3 is done on appearance of a dot operator, all other tokens
+   States 1 is "fragile". The parser will remain in state 1
+   as long as only opening parentheses are seen. All other tokens
    will cause the parser to fall back to state 1 or 0, eventually
    with an error message about invalid intermixing of constant and
-   non-constant strings.
-
-   Likewise, state 3 is fragile.  The parser will remain in state 3
-   as long as only closing parentheses are seen, a transition to state
-   2 is done on appearance of another (literal!) string, all other
-   tokens will cause a warning.  */
+   non-constant strings.  */
 
 static bool
-extract_balanced (message_list_ty *mlp, int state, token_type_ty delim,
+extract_balanced (message_list_ty *mlp,
+                 int state,
+                 token_type_ty delim, bool eat_delim, bool comma_delim,
                  flag_context_ty outer_context,
                  flag_context_list_iterator_ty context_iter,
                  int arg, struct arglist_parser *argparser)
 {
-  /* Number of left parentheses seen.  */
-  int paren_seen = 0;
-
   /* Whether to implicitly assume the next tokens are arguments even without
      a '('.  */
   bool next_is_argument = false;
+  /* Parameters of the keyword just seen.  Defined only when next_is_argument
+     is true.  */
+  const struct callshapes *next_shapes = NULL;
+  struct arglist_parser *next_argparser = NULL;
+
+  /* Whether to not consider strings until the next comma.  */
+  bool skip_until_comma = false;
 
   /* Context iterator that will be used if the next token is a '('.  */
   flag_context_list_iterator_ty next_context_iter =
@@ -2865,23 +2917,84 @@
          xgettext_current_source_encoding = po_charset_utf8;
          arglist_parser_done (argparser, arg);
          xgettext_current_source_encoding = xgettext_global_source_encoding;
+         if (next_argparser != NULL)
+           free (next_argparser);
 #if DEBUG_PERL
          fprintf (stderr, "%s:%d: extract_balanced finished (%d)\n",
                   logical_file_name, tp->line_number, --nesting_level);
 #endif
-         free_token (tp);
+         if (eat_delim)
+           free_token (tp);
+         else
+           /* Preserve the delimiter for the caller.  */
+           x_perl_unlex (tp);
          return false;
        }
 
+      if (comma_delim && tp->type == token_type_comma)
+       {
+         xgettext_current_source_encoding = po_charset_utf8;
+         arglist_parser_done (argparser, arg);
+         xgettext_current_source_encoding = xgettext_global_source_encoding;
+         if (next_argparser != NULL)
+           free (next_argparser);
+#if DEBUG_PERL
+         fprintf (stderr, "%s:%d: extract_balanced finished at comma (%d)\n",
+                  logical_file_name, tp->line_number, --nesting_level);
+#endif
+         x_perl_unlex (tp);
+         return false;
+       }
+         
       if (next_is_argument && tp->type != token_type_lparen)
        {
          /* An argument list starts, even though there is no '('.  */
-         context_iter = next_context_iter;
-         outer_context = inner_context;
-         inner_context =
-           inherited_context (outer_context,
-                              flag_context_list_iterator_advance (
-                                &context_iter));
+         bool next_comma_delim;
+
+         x_perl_unlex (tp);
+
+         if (next_shapes != NULL)
+           /* We know something about the function being called.  Assume
+              that it consumes only one argument if no argument number or
+              total > 1 is specified.  */
+           {
+             size_t i;
+
+             next_comma_delim = true;
+             for (i = 0; i < next_shapes->nshapes; i++)
+               {
+                 const struct callshape *shape = &next_shapes->shapes[i];
+
+                 if (shape->argnum1 > 1
+                     || shape->argnum2 > 1
+                     || shape->argnumc > 1
+                     || shape->argtotal > 1)
+                   next_comma_delim = false;
+               }
+           }
+         else
+           /* We know nothing about the function being called.  It could be
+              a function prototyped to take only one argument, or on the other
+              hand it could be prototyped to take more than one argument or an
+              arbitrary argument list or it could be unprototyped.  Due to
+              the way the parser works, assuming the first case gives the
+              best results.  */
+           next_comma_delim = true;
+
+         if (extract_balanced (mlp, state, delim, false, next_comma_delim,
+                               inner_context, next_context_iter,
+                               1, next_argparser))
+           {
+             xgettext_current_source_encoding = po_charset_utf8;
+             arglist_parser_done (argparser, arg);
+             xgettext_current_source_encoding = 
xgettext_global_source_encoding;
+             return true;
+           }
+
+         next_is_argument = false;
+         next_argparser = NULL;
+         next_context_iter = null_context_list_iterator;
+         continue;
        }
 
       switch (tp->type)
@@ -2902,15 +3015,16 @@
                const struct callshapes *shapes =
                  (const struct callshapes *) keyword_value;
 
-               xgettext_current_source_encoding = po_charset_utf8;
-               arglist_parser_done (argparser, arg);
-               xgettext_current_source_encoding = 
xgettext_global_source_encoding;
-               argparser = arglist_parser_alloc (mlp, shapes);
-               arg = 1;
-
                last_token = token_type_keyword_symbol;
-
-               state = 2;
+               next_shapes = shapes;
+               next_argparser = arglist_parser_alloc (mlp, shapes);
+               state = 1;
+             }
+           else
+             {
+               next_shapes = NULL;
+               next_argparser = arglist_parser_alloc (mlp, NULL);
+               state = 0;
              }
          }
          next_is_argument = true;
@@ -2928,44 +3042,74 @@
 #endif
          prefer_division_over_regexp = true;
          next_is_argument = false;
+         if (next_argparser != NULL)
+           free (next_argparser);
+         next_argparser = NULL;
          next_context_iter = null_context_list_iterator;
          break;
 
        case token_type_lparen:
 #if DEBUG_PERL
-         fprintf (stderr, "%s:%d: type left parentheses (%d)\n",
+         fprintf (stderr, "%s:%d: type left parenthesis (%d)\n",
                   logical_file_name, tp->line_number, nesting_level);
 #endif
-         ++paren_seen;
-
-         if (extract_balanced (mlp, state, token_type_rparen,
-                               inner_context, next_context_iter,
-                               arg, arglist_parser_clone (argparser)))
+         if (next_is_argument)
            {
-             xgettext_current_source_encoding = po_charset_utf8;
-             arglist_parser_done (argparser, arg);
-             xgettext_current_source_encoding = 
xgettext_global_source_encoding;
-             free_token (tp);
-             return true;
+             /* Parse the argument list of a function call.  */
+             if (extract_balanced (mlp, state, token_type_rparen, true, false,
+                                   inner_context, next_context_iter,
+                                   1, next_argparser))
+               {
+                 xgettext_current_source_encoding = po_charset_utf8;
+                 arglist_parser_done (argparser, arg);
+                 xgettext_current_source_encoding = 
xgettext_global_source_encoding;
+                 return true;
+               }
+             next_is_argument = false;
+             next_argparser = NULL;
            }
-         if (my_last_token == token_type_keyword_symbol)
+         else
            {
-             xgettext_current_source_encoding = po_charset_utf8;
-             arglist_parser_done (argparser, arg);
-             xgettext_current_source_encoding = 
xgettext_global_source_encoding;
-             argparser = arglist_parser_alloc (mlp, NULL);
+             /* Parse a parenthesized expression or comma expression.  */
+             if (extract_balanced (mlp, state, token_type_rparen, true, false,
+                                   inner_context, next_context_iter,
+                                   arg, arglist_parser_clone (argparser)))
+               {
+                 xgettext_current_source_encoding = po_charset_utf8;
+                 arglist_parser_done (argparser, arg);
+                 xgettext_current_source_encoding = 
xgettext_global_source_encoding;
+                 if (next_argparser != NULL)
+                   free (next_argparser);
+                 free_token (tp);
+                 return true;
+               }
+             /* FIXME: Is this still needed?  */
+             if (my_last_token == token_type_keyword_symbol)
+               {
+                 xgettext_current_source_encoding = po_charset_utf8;
+                 arglist_parser_done (argparser, arg);
+                 xgettext_current_source_encoding = 
xgettext_global_source_encoding;
+                 argparser = arglist_parser_alloc (mlp, NULL);
+               }
+             next_is_argument = false;
+             if (next_argparser != NULL)
+               free (next_argparser);
+             next_argparser = NULL;
            }
-         next_is_argument = false;
+         skip_until_comma = true;
          next_context_iter = null_context_list_iterator;
          break;
 
        case token_type_rparen:
 #if DEBUG_PERL
-         fprintf (stderr, "%s:%d: type right parentheses(%d)\n",
+         fprintf (stderr, "%s:%d: type right parenthesis (%d)\n",
                   logical_file_name, tp->line_number, nesting_level);
 #endif
-         --paren_seen;
          next_is_argument = false;
+         if (next_argparser != NULL)
+           free (next_argparser);
+         next_argparser = NULL;
+         skip_until_comma = true;
          next_context_iter = null_context_list_iterator;
          break;
 
@@ -2994,6 +3138,10 @@
                               flag_context_list_iterator_advance (
                                 &context_iter));
          next_is_argument = false;
+         if (next_argparser != NULL)
+           free (next_argparser);
+         next_argparser = NULL;
+         skip_until_comma = false;
          next_context_iter = passthrough_context_list_iterator;
          break;
 
@@ -3015,16 +3163,36 @@
              remember_a_message (mlp, NULL, string, inner_context, &pos, 
savable_comment);
              xgettext_current_source_encoding = 
xgettext_global_source_encoding;
            }
-         else if (state)
+         else if (/* state != 0 && */ !skip_until_comma)
            {
-             char *string = collect_message (mlp, tp, EXIT_FAILURE);
+             /* Need to collect the complete string, with error checking,
+                only if the argument ARG is used in ARGPARSER.  */
+             bool must_collect = false;
+             {
+               size_t nalternatives = argparser->nalternatives;
+               size_t i;
 
-             xgettext_current_source_encoding = po_charset_utf8;
-             arglist_parser_remember (argparser, arg,
-                                      string, inner_context,
-                                      logical_file_name, tp->line_number,
-                                      savable_comment);
-             xgettext_current_source_encoding = 
xgettext_global_source_encoding;
+               for (i = 0; i < nalternatives; i++)
+                 {
+                   struct partial_call *cp = &argparser->alternative[i];
+
+                   if (arg == cp->argnumc
+                       || arg == cp->argnum1 || arg == cp->argnum2)
+                     must_collect = true;
+                 }
+             }
+
+             if (must_collect)
+               {
+                 char *string = collect_message (mlp, tp, EXIT_FAILURE);
+
+                 xgettext_current_source_encoding = po_charset_utf8;
+                 arglist_parser_remember (argparser, arg,
+                                          string, inner_context,
+                                          logical_file_name, tp->line_number,
+                                          savable_comment);
+                 xgettext_current_source_encoding = 
xgettext_global_source_encoding;
+               }
            }
 
          if (arglist_parser_decidedp (argparser, arg))
@@ -3037,6 +3205,9 @@
            }
 
          next_is_argument = false;
+         if (next_argparser != NULL)
+           free (next_argparser);
+         next_argparser = NULL;
          next_context_iter = null_context_list_iterator;
          break;
 
@@ -3048,6 +3219,9 @@
          xgettext_current_source_encoding = po_charset_utf8;
          arglist_parser_done (argparser, arg);
          xgettext_current_source_encoding = xgettext_global_source_encoding;
+         if (next_argparser != NULL)
+           free (next_argparser);
+         next_argparser = NULL;
          free_token (tp);
          return true;
 
@@ -3056,17 +3230,22 @@
          fprintf (stderr, "%s:%d: type lbrace (%d)\n",
                   logical_file_name, tp->line_number, nesting_level);
 #endif
-         if (extract_balanced (mlp, 0, token_type_rbrace,
+         if (extract_balanced (mlp, 0, token_type_rbrace, true, false,
                                null_context, null_context_list_iterator,
                                1, arglist_parser_alloc (mlp, NULL)))
            {
              xgettext_current_source_encoding = po_charset_utf8;
              arglist_parser_done (argparser, arg);
              xgettext_current_source_encoding = 
xgettext_global_source_encoding;
+             if (next_argparser != NULL)
+               free (next_argparser);
              free_token (tp);
              return true;
            }
          next_is_argument = false;
+         if (next_argparser != NULL)
+           free (next_argparser);
+         next_argparser = NULL;
          next_context_iter = null_context_list_iterator;
          break;
 
@@ -3076,6 +3255,9 @@
                   logical_file_name, tp->line_number, nesting_level);
 #endif
          next_is_argument = false;
+         if (next_argparser != NULL)
+           free (next_argparser);
+         next_argparser = NULL;
          next_context_iter = null_context_list_iterator;
          state = 0;
          break;
@@ -3085,17 +3267,22 @@
          fprintf (stderr, "%s:%d: type lbracket (%d)\n",
                   logical_file_name, tp->line_number, nesting_level);
 #endif
-         if (extract_balanced (mlp, 0, token_type_rbracket,
+         if (extract_balanced (mlp, 0, token_type_rbracket, true, false,
                                null_context, null_context_list_iterator,
                                1, arglist_parser_alloc (mlp, NULL)))
            {
              xgettext_current_source_encoding = po_charset_utf8;
              arglist_parser_done (argparser, arg);
              xgettext_current_source_encoding = 
xgettext_global_source_encoding;
+             if (next_argparser != NULL)
+               free (next_argparser);
              free_token (tp);
              return true;
            }
          next_is_argument = false;
+         if (next_argparser != NULL)
+           free (next_argparser);
+         next_argparser = NULL;
          next_context_iter = null_context_list_iterator;
          break;
 
@@ -3105,6 +3292,9 @@
                   logical_file_name, tp->line_number, nesting_level);
 #endif
          next_is_argument = false;
+         if (next_argparser != NULL)
+           free (next_argparser);
+         next_argparser = NULL;
          next_context_iter = null_context_list_iterator;
          state = 0;
          break;
@@ -3128,6 +3318,9 @@
          outer_context = null_context;
          context_iter = null_context_list_iterator;
          next_is_argument = false;
+         if (next_argparser != NULL)
+           free (next_argparser);
+         next_argparser = NULL;
          next_context_iter = passthrough_context_list_iterator;
          inner_context =
            inherited_context (outer_context,
@@ -3141,6 +3334,9 @@
                   logical_file_name, tp->line_number, nesting_level);
 #endif
          next_is_argument = false;
+         if (next_argparser != NULL)
+           free (next_argparser);
+         next_argparser = NULL;
          next_context_iter = null_context_list_iterator;
          break;
 
@@ -3150,6 +3346,9 @@
                   logical_file_name, tp->line_number, nesting_level);
 #endif
          next_is_argument = false;
+         if (next_argparser != NULL)
+           free (next_argparser);
+         next_argparser = NULL;
          next_context_iter = null_context_list_iterator;
          state = 0;
          break;
@@ -3161,6 +3360,9 @@
                   tp->string);
 #endif
          next_is_argument = false;
+         if (next_argparser != NULL)
+           free (next_argparser);
+         next_argparser = NULL;
          next_context_iter = null_context_list_iterator;
          state = 0;
          break;
@@ -3171,6 +3373,9 @@
                   logical_file_name, tp->line_number, nesting_level);
 #endif
          next_is_argument = false;
+         if (next_argparser != NULL)
+           free (next_argparser);
+         next_argparser = NULL;
          next_context_iter = null_context_list_iterator;
          break;
 
@@ -3180,6 +3385,9 @@
                   logical_file_name, tp->line_number, nesting_level);
 #endif
          next_is_argument = false;
+         if (next_argparser != NULL)
+           free (next_argparser);
+         next_argparser = NULL;
          next_context_iter = null_context_list_iterator;
          state = 0;
          break;
@@ -3223,7 +3431,7 @@
 
   /* Eat tokens until eof is seen.  When extract_balanced returns
      due to an unbalanced closing brace, just restart it.  */
-  while (!extract_balanced (mlp, 0, token_type_rbrace,
+  while (!extract_balanced (mlp, 0, token_type_rbrace, true, false,
                            null_context, null_context_list_iterator,
                            1, arglist_parser_alloc (mlp, NULL)))
     ;

Reply via email to