This adds the ability to defer the validation of numeric attribute
arguments until the sequence is parsed if the attribute being
handled is one known to be 'clang form'.

We do this by considering the arguments to be strings regardless
of content and defer the interpretation of those strings until the
argument processing.

Since the C++ front end lexes tokens separately (and would report
non-compliant numeric tokens before we can intercept them), we need
to implement a small state machine to track the lexing of attributes.

        PR c++/109877

gcc/cp/ChangeLog:

        * parser.cc (enum clang_attr_state): New.
        (cp_lexer_attribute_state): New.
        (cp_lexer_new_main): Set initial attribute lexing state.
        (cp_lexer_get_preprocessor_token): Handle lexing of clang-
        form attributes.
        (cp_parser_clang_attribute): Handle clang-form attributes.
        (cp_parser_gnu_attribute_list): Switch to clang-form parsing
        where needed.
        * parser.h : New flag to signal lexing clang-form attributes.

Signed-off-by: Iain Sandoe <i...@sandoe.co.uk>
---
 gcc/cp/parser.cc | 230 +++++++++++++++++++++++++++++++++++++++++++++--
 gcc/cp/parser.h  |   3 +
 2 files changed, 227 insertions(+), 6 deletions(-)

diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index 5116bcb78f6..c12f473f2e3 100644
--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc
@@ -699,6 +699,91 @@ cp_lexer_handle_early_pragma (cp_lexer *lexer)
 static cp_parser *cp_parser_new (cp_lexer *);
 static GTY (()) cp_parser *the_parser;
 
+/* Context-sensitive parse-checking for clang-style attributes.  */
+
+enum clang_attr_state {
+  CA_NONE = 0,
+  CA_ATTR,
+  CA_BR1, CA_BR2,
+  CA_LIST,
+  CA_LIST_ARGS,
+  CA_IS_CA,
+  CA_CA_ARGS,
+  CA_LIST_CONT
+};
+
+/* State machine tracking context of attribute lexing.  */
+
+static enum clang_attr_state
+cp_lexer_attribute_state (cp_token& token, enum clang_attr_state attr_state)
+{
+  /* Implement a context-sensitive parser for clang attributes.
+     We detect __attribute__((clang_style_attribute (ARGS))) and lex the
+     args ARGS with the following differences from GNU attributes:
+       (a) number-like values are lexed as strings [this allows lexing XX.YY.ZZ
+          version numbers].
+       (b) we concatenate strings, since clang attributes allow this too.  */
+  switch (attr_state)
+    {
+    case CA_NONE:
+      if (token.type == CPP_KEYWORD
+         && token.keyword == RID_ATTRIBUTE)
+       attr_state = CA_ATTR;
+      break;
+    case CA_ATTR:
+      if (token.type == CPP_OPEN_PAREN)
+       attr_state = CA_BR1;
+      else
+       attr_state = CA_NONE;
+      break;
+    case CA_BR1:
+      if (token.type == CPP_OPEN_PAREN)
+       attr_state = CA_BR2;
+      else
+       attr_state = CA_NONE;
+      break;
+    case CA_BR2:
+      if (token.type == CPP_NAME)
+       {
+         tree identifier = (token.type == CPP_KEYWORD)
+           /* For keywords, use the canonical spelling, not the
+              parsed identifier.  */
+           ? ridpointers[(int) token.keyword]
+           : token.u.value;
+         identifier = canonicalize_attr_name (identifier);
+         if (attribute_clang_form_p (identifier))
+           attr_state = CA_IS_CA;
+         else
+           attr_state = CA_LIST;
+       }
+      else
+       attr_state = CA_NONE;
+      break;
+    case CA_IS_CA:
+    case CA_LIST:
+      if (token.type == CPP_COMMA)
+       attr_state = CA_BR2; /* Back to the list outer.  */
+      else if (token.type == CPP_OPEN_PAREN)
+       attr_state = attr_state == CA_IS_CA ? CA_CA_ARGS
+                                           : CA_LIST_ARGS;
+      else
+       attr_state = CA_NONE;
+      break;
+    case CA_CA_ARGS: /* We will special-case args in this state.  */
+    case CA_LIST_ARGS:
+      if (token.type == CPP_CLOSE_PAREN)
+       attr_state = CA_LIST_CONT;
+      break;
+    case CA_LIST_CONT:
+      if (token.type == CPP_COMMA)
+       attr_state = CA_BR2; /* Back to the list outer.  */
+      else
+       attr_state = CA_NONE;
+      break;
+    }
+  return attr_state;
+}
+
 /* Create a new main C++ lexer, the lexer that gets tokens from the
    preprocessor, and also create the main parser.  */
 
@@ -715,6 +800,9 @@ cp_lexer_new_main (void)
   c_common_no_more_pch ();
 
   cp_lexer *lexer = cp_lexer_alloc ();
+  lexer->lex_number_as_string_p = false;
+  enum clang_attr_state attr_state = CA_NONE;
+
   /* Put the first token in the buffer.  */
   cp_token *tok = lexer->buffer->quick_push (token);
 
@@ -738,8 +826,20 @@ cp_lexer_new_main (void)
       if (tok->type == CPP_PRAGMA_EOL)
        cp_lexer_handle_early_pragma (lexer);
 
+      attr_state = cp_lexer_attribute_state (*tok, attr_state);
       tok = vec_safe_push (lexer->buffer, cp_token ());
-      cp_lexer_get_preprocessor_token (C_LEX_STRING_NO_JOIN, tok);
+      unsigned int flags = C_LEX_STRING_NO_JOIN;
+      if (attr_state == CA_CA_ARGS)
+       {
+         /* We are handling a clang attribute;
+            1. do not try to diagnose x.y.z as a bad number, it could be a
+               version.
+            2. join strings.  */
+          flags = C_LEX_NUMBER_AS_STRING;
+         lexer->lex_number_as_string_p = true;
+       }
+      cp_lexer_get_preprocessor_token (flags, tok);
+      lexer->lex_number_as_string_p = false;
     }
 
   lexer->next_token = lexer->buffer->address ();
@@ -956,7 +1056,15 @@ cp_lexer_get_preprocessor_token (unsigned flags, cp_token 
*token)
 {
   static int is_extern_c = 0;
 
-   /* Get a new token from the preprocessor.  */
+   /* Get a new token from the preprocessor.
+  int lex_flags = 0;
+  if (lexer != NULL)
+    {
+      lex_flags = C_LEX_STRING_NO_JOIN;
+      if (lexer->lex_number_as_string_p)
+       lex_flags |= C_LEX_NUMBER_AS_STRING;
+    }
+  */
   token->type
     = c_lex_with_flags (&token->u.value, &token->location, &token->flags,
                        flags);
@@ -29302,6 +29410,113 @@ cp_parser_gnu_attributes_opt (cp_parser* parser)
   return attributes;
 }
 
+/* Parse the arguments list for a clang attribute.   */
+static tree
+cp_parser_clang_attribute (cp_parser *parser, tree/*attr_id*/)
+{
+  /* Each entry can be :
+     identifier
+     identifier=N.MM.Z
+     identifier="string"
+     followed by ',' or ) for the last entry*/
+
+  matching_parens parens;
+  if (!parens.require_open (parser))
+    return NULL;
+
+  bool save_translate_strings_p = parser->translate_strings_p;
+  parser->translate_strings_p = false;
+  tree attr_args = NULL_TREE;
+  cp_token *token = cp_lexer_peek_token (parser->lexer);
+  if (token->type == CPP_NAME
+      && cp_lexer_peek_nth_token (parser->lexer, 2)->type == CPP_COMMA)
+    {
+      tree platf = token->u.value;
+      cp_lexer_consume_token (parser->lexer);
+      attr_args = tree_cons (NULL_TREE, platf, NULL_TREE);
+    }
+  else
+    {
+      error_at (token->location,
+               "expected a platform name followed by %<,%>");
+      cp_parser_skip_to_closing_parenthesis (parser,
+                                            /*recovering=*/true,
+                                            /*or_comma=*/false,
+                                            /*consume_paren=*/true);
+      parser->translate_strings_p = save_translate_strings_p;
+      return error_mark_node;
+    }
+
+  cp_lexer_consume_token (parser->lexer); /* consume the ',' */
+  do
+    {
+      tree name = NULL_TREE;
+      tree value = NULL_TREE;
+
+      token = cp_lexer_peek_token (parser->lexer);
+      if (token->type == CPP_NAME)
+       {
+         name = token->u.value;
+         cp_lexer_consume_token (parser->lexer);
+       }
+      else
+       {
+         /* FIXME: context-sensitive for that attrib.  */
+         error_at (token->location, "expected an attribute keyword");
+         cp_parser_skip_to_closing_parenthesis (parser,
+                                                /*recovering=*/true,
+                                                /*or_comma=*/false,
+                                                /*consume_paren=*/true);
+         attr_args = error_mark_node;
+         break;
+       }
+
+      if (cp_lexer_next_token_is (parser->lexer, CPP_EQ))
+       {
+         cp_lexer_consume_token (parser->lexer); /* eat the '=' */
+         token = cp_lexer_peek_token (parser->lexer);
+         if (cp_lexer_next_token_is_not (parser->lexer, CPP_CLOSE_PAREN)
+             && cp_lexer_next_token_is_not (parser->lexer, CPP_COMMA))
+           {
+             value = token->u.value;
+             /* ???: check for error mark and early-return?  */
+             cp_lexer_consume_token (parser->lexer);
+           }
+         else
+           {
+             error_at (token->location, "expected a value");
+             cp_parser_skip_to_closing_parenthesis (parser,
+                                                    /*recovering=*/true,
+                                                    /*or_comma=*/false,
+                                                    /*consume_paren=*/true);
+             attr_args = error_mark_node;
+             break;
+           }
+       }
+      else if (cp_lexer_next_token_is_not (parser->lexer, CPP_CLOSE_PAREN)
+              && cp_lexer_next_token_is_not (parser->lexer, CPP_COMMA))
+       {
+         error_at (token->location, "expected %<,%>, %<=%> or %<)%>");
+         cp_parser_skip_to_closing_parenthesis (parser,
+                                                /*recovering=*/true,
+                                                /*or_comma=*/false,
+                                                /*consume_paren=*/true);
+         attr_args = error_mark_node;
+         break;
+       }
+    if (cp_lexer_next_token_is (parser->lexer, CPP_COMMA))
+      cp_lexer_consume_token (parser->lexer);
+    tree t = tree_cons (value, name, NULL_TREE);
+    attr_args = chainon (attr_args, t);
+  } while (cp_lexer_next_token_is_not (parser->lexer, CPP_CLOSE_PAREN));
+
+  parser->translate_strings_p = save_translate_strings_p;
+  if (!parens.require_close (parser))
+    return error_mark_node;
+
+  return attr_args;
+}
+
 /* Parse a GNU attribute-list.
 
    attribute-list:
@@ -29361,9 +29576,12 @@ cp_parser_gnu_attribute_list (cp_parser* parser, bool 
exactly_one /* = false */)
 
          /* Peek at the next token.  */
          token = cp_lexer_peek_token (parser->lexer);
-         /* If it's an `(', then parse the attribute arguments.  */
-         if (token->type == CPP_OPEN_PAREN)
+         if (token->type == CPP_OPEN_PAREN
+             && attribute_clang_form_p (identifier))
+           arguments = cp_parser_clang_attribute (parser, identifier);
+         else if (token->type == CPP_OPEN_PAREN)
            {
+             /* If it's an `(', then parse the attribute arguments.  */
              vec<tree, va_gc> *vec;
              int attr_flag = (attribute_takes_identifier_p (identifier)
                               ? id_attr : normal_attr);
@@ -29380,12 +29598,12 @@ cp_parser_gnu_attribute_list (cp_parser* parser, bool 
exactly_one /* = false */)
                  arguments = build_tree_list_vec (vec);
                  release_tree_vector (vec);
                }
-             /* Save the arguments away.  */
-             TREE_VALUE (attribute) = arguments;
            }
 
          if (arguments != error_mark_node)
            {
+             /* Save the arguments away.  */
+             TREE_VALUE (attribute) = arguments;
              /* Add this attribute to the list.  */
              TREE_CHAIN (attribute) = attribute_list;
              attribute_list = attribute;
diff --git a/gcc/cp/parser.h b/gcc/cp/parser.h
index ee3afbaae04..e75c7abac5e 100644
--- a/gcc/cp/parser.h
+++ b/gcc/cp/parser.h
@@ -125,6 +125,9 @@ struct GTY (()) cp_lexer {
   /* True for in_omp_attribute_pragma lexer that should be destroyed
      when it is no longer in use.  */
   bool orphan_p;
+
+  /* TRUE if we want to parse number-like tokens as a string cst.  */
+  bool lex_number_as_string_p;
 };
 
 
-- 
2.39.2 (Apple Git-143)

Reply via email to