On Thu, Sep 10, 2015 at 10:28 PM, David Malcolm <dmalc...@redhat.com> wrote: > This patch adds source *range* information to libcpp's cpp_token, and to > c_token and cp_token in the C and C++ frontends respectively. > > To minimize churn, I kept the existing location_t fields, though in > theory these are always just equal to the start of the source range. > > cpplib.h's struct cpp_token had this comment: > > /* A preprocessing token. This has been carefully packed and should > occupy 16 bytes on 32-bit hosts and 24 bytes on 64-bit hosts. */ > > Does anyone know why this was "carefully packed" and to what extent > this matters? I'm adding an extra 8 bytes to it (or 4 if we eliminate > the existing location_t). As far as I can see, these are > short-lived, and there are only relative few alive at any time. > Or is it about making them fast to copy? > > gcc/c-family/ChangeLog: > * c-lex.c (c_lex_with_flags): Add "range" param, and write back > to *range with the range of the libcpp token. > * c-pragma.h (c_lex_with_flags): Add "range" param. > > gcc/c/ChangeLog: > * c-parser.c (struct c_token): Add "range" field. > (c_lex_one_token): Write back to token->range in call to > c_lex_with_flags. > > gcc/cp/ChangeLog: > * parser.c (eof_token): Add "range" field to initializer. > (cp_lexer_get_preprocessor_token): Write back to token->range in > call to c_lex_with_flags. > * parser.h (struct cp_token): Add "range" field. > > libcpp/ChangeLog: > * include/cpplib.h (struct cpp_token): Add src_range field. > * lex.c (_cpp_lex_direct): Set up the src_range on the token. > --- > gcc/c-family/c-lex.c | 7 +++++-- > gcc/c-family/c-pragma.h | 4 ++-- > gcc/c/c-parser.c | 6 +++++- > gcc/cp/parser.c | 5 +++-- > gcc/cp/parser.h | 2 ++ > libcpp/include/cpplib.h | 4 +++- > libcpp/lex.c | 8 ++++++++ > 7 files changed, 28 insertions(+), 8 deletions(-) > > diff --git a/gcc/c-family/c-lex.c b/gcc/c-family/c-lex.c > index 55ceb20..1334994 100644 > --- a/gcc/c-family/c-lex.c > +++ b/gcc/c-family/c-lex.c > @@ -380,11 +380,13 @@ c_common_has_attribute (cpp_reader *pfile) > } > > /* Read a token and return its type. Fill *VALUE with its value, if > - applicable. Fill *CPP_FLAGS with the token's flags, if it is > + applicable. Fill *LOC and *RANGE with the source location and range > + of the token. Fill *CPP_FLAGS with the token's flags, if it is > non-NULL. */ > > enum cpp_ttype > -c_lex_with_flags (tree *value, location_t *loc, unsigned char *cpp_flags, > +c_lex_with_flags (tree *value, location_t *loc, source_range *range, > + unsigned char *cpp_flags, > int lex_flags) > { > static bool no_more_pch; > @@ -397,6 +399,7 @@ c_lex_with_flags (tree *value, location_t *loc, unsigned > char *cpp_flags, > retry: > tok = cpp_get_token_with_location (parse_in, loc); > type = tok->type; > + *range = tok->src_range; > > retry_after_at: > switch (type) > diff --git a/gcc/c-family/c-pragma.h b/gcc/c-family/c-pragma.h > index aa2b471..05df543 100644 > --- a/gcc/c-family/c-pragma.h > +++ b/gcc/c-family/c-pragma.h > @@ -225,8 +225,8 @@ extern enum cpp_ttype pragma_lex (tree *); > /* This is not actually available to pragma parsers. It's merely a > convenient location to declare this function for c-lex, after > having enum cpp_ttype declared. */ > -extern enum cpp_ttype c_lex_with_flags (tree *, location_t *, unsigned char > *, > - int); > +extern enum cpp_ttype c_lex_with_flags (tree *, location_t *, source_range *, > + unsigned char *, int); > > extern void c_pp_lookup_pragma (unsigned int, const char **, const char **); > > diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c > index 11a2b0f..5d822ee 100644 > --- a/gcc/c/c-parser.c > +++ b/gcc/c/c-parser.c > @@ -170,6 +170,8 @@ struct GTY (()) c_token { > ENUM_BITFIELD (pragma_kind) pragma_kind : 8; > /* The location at which this token was found. */ > location_t location; > + /* The source range at which this token was found. */ > + source_range range; > /* The value associated with this token, if any. */ > tree value; > }; > @@ -239,7 +241,9 @@ c_lex_one_token (c_parser *parser, c_token *token) > { > timevar_push (TV_LEX); > > - token->type = c_lex_with_flags (&token->value, &token->location, NULL, > + token->type = c_lex_with_flags (&token->value, &token->location, > + &token->range, > + NULL, > (parser->lex_untranslated_string > ? C_LEX_STRING_NO_TRANSLATE : 0)); > token->id_kind = C_ID_NONE; > diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c > index 67fbcda..7c59c58 100644 > --- a/gcc/cp/parser.c > +++ b/gcc/cp/parser.c > @@ -58,7 +58,7 @@ along with GCC; see the file COPYING3. If not see > > static cp_token eof_token = > { > - CPP_EOF, RID_MAX, 0, PRAGMA_NONE, false, false, false, 0, { NULL } > + CPP_EOF, RID_MAX, 0, PRAGMA_NONE, false, false, false, 0, {0, 0}, { NULL } > }; > > /* The various kinds of non integral constant we encounter. */ > @@ -764,7 +764,8 @@ cp_lexer_get_preprocessor_token (cp_lexer *lexer, > cp_token *token) > > /* Get a new token from the preprocessor. */ > token->type > - = c_lex_with_flags (&token->u.value, &token->location, &token->flags, > + = c_lex_with_flags (&token->u.value, &token->location, > + &token->range, &token->flags, > lexer == NULL ? 0 : C_LEX_STRING_NO_JOIN); > token->keyword = RID_MAX; > token->pragma_kind = PRAGMA_NONE; > diff --git a/gcc/cp/parser.h b/gcc/cp/parser.h > index 760467c..c7558a0 100644 > --- a/gcc/cp/parser.h > +++ b/gcc/cp/parser.h > @@ -61,6 +61,8 @@ struct GTY (()) cp_token { > BOOL_BITFIELD purged_p : 1; > /* The location at which this token was found. */ > location_t location; > + /* The source range at which this token was found. */ > + source_range range;
Is it just me or does location now feel somewhat redundant with range? Can't we compress that somehow? > /* The value associated with this token, if any. */ > union cp_token_value { > /* Used for CPP_NESTED_NAME_SPECIFIER and CPP_TEMPLATE_ID. */ > diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h > index a2bdfa0..0b1a403 100644 > --- a/libcpp/include/cpplib.h > +++ b/libcpp/include/cpplib.h > @@ -235,9 +235,11 @@ struct GTY(()) cpp_identifier { > }; > > /* A preprocessing token. This has been carefully packed and should > - occupy 16 bytes on 32-bit hosts and 24 bytes on 64-bit hosts. */ > + occupy 16 bytes on 32-bit hosts and 24 bytes on 64-bit hosts. > + FIXME: the above comment is no longer true with this patch. */ > struct GTY(()) cpp_token { > source_location src_loc; /* Location of first char of token. */ > + source_range src_range; /* Source range covered by the token. */ > ENUM_BITFIELD(cpp_ttype) type : CHAR_BIT; /* token type */ > unsigned short flags; /* flags - see above */ > > diff --git a/libcpp/lex.c b/libcpp/lex.c > index 0aa1090..a84a8c0 100644 > --- a/libcpp/lex.c > +++ b/libcpp/lex.c > @@ -2365,6 +2365,9 @@ _cpp_lex_direct (cpp_reader *pfile) > result->src_loc = linemap_position_for_column (pfile->line_table, > CPP_BUF_COLUMN (buffer, > buffer->cur)); > > + /* The token's src_range begins here. */ > + result->src_range.m_start = result->src_loc; > + > switch (c) > { > case ' ': case '\t': case '\f': case '\v': case '\0': > @@ -2723,6 +2726,11 @@ _cpp_lex_direct (cpp_reader *pfile) > break; > } > > + /* The token's src_range ends here. */ > + result->src_range.m_finish = > + linemap_position_for_column (pfile->line_table, > + CPP_BUF_COLUMN (buffer, buffer->cur)); > + > return result; > } > > -- > 1.8.5.3 >