On Thu, Sep 10, 2015 at 10:28 PM, David Malcolm <dmalc...@redhat.com> wrote:
> This patch adds source *range* information to libcpp's cpp_token, and to
> c_token and cp_token in the C and C++ frontends respectively.
>
> To minimize churn, I kept the existing location_t fields, though in
> theory these are always just equal to the start of the source range.
>
> cpplib.h's struct cpp_token had this comment:
>
>   /* A preprocessing token.  This has been carefully packed and should
>      occupy 16 bytes on 32-bit hosts and 24 bytes on 64-bit hosts.  */
>
> Does anyone know why this was "carefully packed" and to what extent
> this matters?  I'm adding an extra 8 bytes to it (or 4 if we eliminate
> the existing location_t).  As far as I can see, these are
> short-lived, and there are only relative few alive at any time.
> Or is it about making them fast to copy?
>
> gcc/c-family/ChangeLog:
>         * c-lex.c (c_lex_with_flags): Add "range" param, and write back
>         to *range with the range of the libcpp token.
>         * c-pragma.h (c_lex_with_flags): Add "range" param.
>
> gcc/c/ChangeLog:
>         * c-parser.c (struct c_token): Add "range" field.
>         (c_lex_one_token): Write back to token->range in call to
>         c_lex_with_flags.
>
> gcc/cp/ChangeLog:
>         * parser.c (eof_token): Add "range" field to initializer.
>         (cp_lexer_get_preprocessor_token): Write back to token->range in
>         call to c_lex_with_flags.
>         * parser.h (struct cp_token): Add "range" field.
>
> libcpp/ChangeLog:
>         * include/cpplib.h (struct cpp_token): Add src_range field.
>         * lex.c (_cpp_lex_direct): Set up the src_range on the token.
> ---
>  gcc/c-family/c-lex.c    | 7 +++++--
>  gcc/c-family/c-pragma.h | 4 ++--
>  gcc/c/c-parser.c        | 6 +++++-
>  gcc/cp/parser.c         | 5 +++--
>  gcc/cp/parser.h         | 2 ++
>  libcpp/include/cpplib.h | 4 +++-
>  libcpp/lex.c            | 8 ++++++++
>  7 files changed, 28 insertions(+), 8 deletions(-)
>
> diff --git a/gcc/c-family/c-lex.c b/gcc/c-family/c-lex.c
> index 55ceb20..1334994 100644
> --- a/gcc/c-family/c-lex.c
> +++ b/gcc/c-family/c-lex.c
> @@ -380,11 +380,13 @@ c_common_has_attribute (cpp_reader *pfile)
>  }
>
>  /* Read a token and return its type.  Fill *VALUE with its value, if
> -   applicable.  Fill *CPP_FLAGS with the token's flags, if it is
> +   applicable.  Fill *LOC and *RANGE with the source location and range
> +   of the token.  Fill *CPP_FLAGS with the token's flags, if it is
>     non-NULL.  */
>
>  enum cpp_ttype
> -c_lex_with_flags (tree *value, location_t *loc, unsigned char *cpp_flags,
> +c_lex_with_flags (tree *value, location_t *loc, source_range *range,
> +                 unsigned char *cpp_flags,
>                   int lex_flags)
>  {
>    static bool no_more_pch;
> @@ -397,6 +399,7 @@ c_lex_with_flags (tree *value, location_t *loc, unsigned 
> char *cpp_flags,
>   retry:
>    tok = cpp_get_token_with_location (parse_in, loc);
>    type = tok->type;
> +  *range = tok->src_range;
>
>   retry_after_at:
>    switch (type)
> diff --git a/gcc/c-family/c-pragma.h b/gcc/c-family/c-pragma.h
> index aa2b471..05df543 100644
> --- a/gcc/c-family/c-pragma.h
> +++ b/gcc/c-family/c-pragma.h
> @@ -225,8 +225,8 @@ extern enum cpp_ttype pragma_lex (tree *);
>  /* This is not actually available to pragma parsers.  It's merely a
>     convenient location to declare this function for c-lex, after
>     having enum cpp_ttype declared.  */
> -extern enum cpp_ttype c_lex_with_flags (tree *, location_t *, unsigned char 
> *,
> -                                       int);
> +extern enum cpp_ttype c_lex_with_flags (tree *, location_t *, source_range *,
> +                                       unsigned char *, int);
>
>  extern void c_pp_lookup_pragma (unsigned int, const char **, const char **);
>
> diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c
> index 11a2b0f..5d822ee 100644
> --- a/gcc/c/c-parser.c
> +++ b/gcc/c/c-parser.c
> @@ -170,6 +170,8 @@ struct GTY (()) c_token {
>    ENUM_BITFIELD (pragma_kind) pragma_kind : 8;
>    /* The location at which this token was found.  */
>    location_t location;
> +  /* The source range at which this token was found.  */
> +  source_range range;
>    /* The value associated with this token, if any.  */
>    tree value;
>  };
> @@ -239,7 +241,9 @@ c_lex_one_token (c_parser *parser, c_token *token)
>  {
>    timevar_push (TV_LEX);
>
> -  token->type = c_lex_with_flags (&token->value, &token->location, NULL,
> +  token->type = c_lex_with_flags (&token->value, &token->location,
> +                                 &token->range,
> +                                 NULL,
>                                   (parser->lex_untranslated_string
>                                    ? C_LEX_STRING_NO_TRANSLATE : 0));
>    token->id_kind = C_ID_NONE;
> diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
> index 67fbcda..7c59c58 100644
> --- a/gcc/cp/parser.c
> +++ b/gcc/cp/parser.c
> @@ -58,7 +58,7 @@ along with GCC; see the file COPYING3.  If not see
>
>  static cp_token eof_token =
>  {
> -  CPP_EOF, RID_MAX, 0, PRAGMA_NONE, false, false, false, 0, { NULL }
> +  CPP_EOF, RID_MAX, 0, PRAGMA_NONE, false, false, false, 0, {0, 0}, { NULL }
>  };
>
>  /* The various kinds of non integral constant we encounter. */
> @@ -764,7 +764,8 @@ cp_lexer_get_preprocessor_token (cp_lexer *lexer, 
> cp_token *token)
>
>     /* Get a new token from the preprocessor.  */
>    token->type
> -    = c_lex_with_flags (&token->u.value, &token->location, &token->flags,
> +    = c_lex_with_flags (&token->u.value, &token->location,
> +                        &token->range, &token->flags,
>                         lexer == NULL ? 0 : C_LEX_STRING_NO_JOIN);
>    token->keyword = RID_MAX;
>    token->pragma_kind = PRAGMA_NONE;
> diff --git a/gcc/cp/parser.h b/gcc/cp/parser.h
> index 760467c..c7558a0 100644
> --- a/gcc/cp/parser.h
> +++ b/gcc/cp/parser.h
> @@ -61,6 +61,8 @@ struct GTY (()) cp_token {
>    BOOL_BITFIELD purged_p : 1;
>    /* The location at which this token was found.  */
>    location_t location;
> +  /* The source range at which this token was found.  */
> +  source_range range;

Is it just me or does location now feel somewhat redundant with range?  Can't we
compress that somehow?

>    /* The value associated with this token, if any.  */
>    union cp_token_value {
>      /* Used for CPP_NESTED_NAME_SPECIFIER and CPP_TEMPLATE_ID.  */
> diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
> index a2bdfa0..0b1a403 100644
> --- a/libcpp/include/cpplib.h
> +++ b/libcpp/include/cpplib.h
> @@ -235,9 +235,11 @@ struct GTY(()) cpp_identifier {
>  };
>
>  /* A preprocessing token.  This has been carefully packed and should
> -   occupy 16 bytes on 32-bit hosts and 24 bytes on 64-bit hosts.  */
> +   occupy 16 bytes on 32-bit hosts and 24 bytes on 64-bit hosts.
> +   FIXME: the above comment is no longer true with this patch.  */
>  struct GTY(()) cpp_token {
>    source_location src_loc;     /* Location of first char of token.  */
> +  source_range src_range;      /* Source range covered by the token.  */
>    ENUM_BITFIELD(cpp_ttype) type : CHAR_BIT;  /* token type */
>    unsigned short flags;                /* flags - see above */
>
> diff --git a/libcpp/lex.c b/libcpp/lex.c
> index 0aa1090..a84a8c0 100644
> --- a/libcpp/lex.c
> +++ b/libcpp/lex.c
> @@ -2365,6 +2365,9 @@ _cpp_lex_direct (cpp_reader *pfile)
>      result->src_loc = linemap_position_for_column (pfile->line_table,
>                                           CPP_BUF_COLUMN (buffer, 
> buffer->cur));
>
> +  /* The token's src_range begins here.  */
> +  result->src_range.m_start = result->src_loc;
> +
>    switch (c)
>      {
>      case ' ': case '\t': case '\f': case '\v': case '\0':
> @@ -2723,6 +2726,11 @@ _cpp_lex_direct (cpp_reader *pfile)
>        break;
>      }
>
> +  /* The token's src_range ends here.  */
> +  result->src_range.m_finish =
> +    linemap_position_for_column (pfile->line_table,
> +                                CPP_BUF_COLUMN (buffer, buffer->cur));
> +
>    return result;
>  }
>
> --
> 1.8.5.3
>

Reply via email to