This patch adds source *range* information to libcpp's cpp_token, and to
c_token and cp_token in the C and C++ frontends respectively.
To minimize churn, I kept the existing location_t fields, though in
theory these are always just equal to the start of the source range.
cpplib.h's struct cpp_token had this comment:
/* A preprocessing token. This has been carefully packed and should
occupy 16 bytes on 32-bit hosts and 24 bytes on 64-bit hosts. */
Does anyone know why this was "carefully packed" and to what extent
this matters? I'm adding an extra 8 bytes to it (or 4 if we eliminate
the existing location_t). As far as I can see, these are
short-lived, and there are only relative few alive at any time.
Or is it about making them fast to copy?
gcc/c-family/ChangeLog:
* c-lex.c (c_lex_with_flags): Add "range" param, and write back
to *range with the range of the libcpp token.
* c-pragma.h (c_lex_with_flags): Add "range" param.
gcc/c/ChangeLog:
* c-parser.c (struct c_token): Add "range" field.
(c_lex_one_token): Write back to token->range in call to
c_lex_with_flags.
gcc/cp/ChangeLog:
* parser.c (eof_token): Add "range" field to initializer.
(cp_lexer_get_preprocessor_token): Write back to token->range in
call to c_lex_with_flags.
* parser.h (struct cp_token): Add "range" field.
libcpp/ChangeLog:
* include/cpplib.h (struct cpp_token): Add src_range field.
* lex.c (_cpp_lex_direct): Set up the src_range on the token.
---
gcc/c-family/c-lex.c | 7 +++++--
gcc/c-family/c-pragma.h | 4 ++--
gcc/c/c-parser.c | 6 +++++-
gcc/cp/parser.c | 5 +++--
gcc/cp/parser.h | 2 ++
libcpp/include/cpplib.h | 4 +++-
libcpp/lex.c | 8 ++++++++
7 files changed, 28 insertions(+), 8 deletions(-)
diff --git a/gcc/c-family/c-lex.c b/gcc/c-family/c-lex.c
index 55ceb20..1334994 100644
--- a/gcc/c-family/c-lex.c
+++ b/gcc/c-family/c-lex.c
@@ -380,11 +380,13 @@ c_common_has_attribute (cpp_reader *pfile)
}
/* Read a token and return its type. Fill *VALUE with its value, if
- applicable. Fill *CPP_FLAGS with the token's flags, if it is
+ applicable. Fill *LOC and *RANGE with the source location and range
+ of the token. Fill *CPP_FLAGS with the token's flags, if it is
non-NULL. */
enum cpp_ttype
-c_lex_with_flags (tree *value, location_t *loc, unsigned char *cpp_flags,
+c_lex_with_flags (tree *value, location_t *loc, source_range *range,
+ unsigned char *cpp_flags,
int lex_flags)
{
static bool no_more_pch;
@@ -397,6 +399,7 @@ c_lex_with_flags (tree *value, location_t *loc, unsigned
char *cpp_flags,
retry:
tok = cpp_get_token_with_location (parse_in, loc);
type = tok->type;
+ *range = tok->src_range;
retry_after_at:
switch (type)
diff --git a/gcc/c-family/c-pragma.h b/gcc/c-family/c-pragma.h
index aa2b471..05df543 100644
--- a/gcc/c-family/c-pragma.h
+++ b/gcc/c-family/c-pragma.h
@@ -225,8 +225,8 @@ extern enum cpp_ttype pragma_lex (tree *);
/* This is not actually available to pragma parsers. It's merely a
convenient location to declare this function for c-lex, after
having enum cpp_ttype declared. */
-extern enum cpp_ttype c_lex_with_flags (tree *, location_t *, unsigned char *,
- int);
+extern enum cpp_ttype c_lex_with_flags (tree *, location_t *, source_range *,
+ unsigned char *, int);
extern void c_pp_lookup_pragma (unsigned int, const char **, const char **);
diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c
index 11a2b0f..5d822ee 100644
--- a/gcc/c/c-parser.c
+++ b/gcc/c/c-parser.c
@@ -170,6 +170,8 @@ struct GTY (()) c_token {
ENUM_BITFIELD (pragma_kind) pragma_kind : 8;
/* The location at which this token was found. */
location_t location;
+ /* The source range at which this token was found. */
+ source_range range;
/* The value associated with this token, if any. */
tree value;
};
@@ -239,7 +241,9 @@ c_lex_one_token (c_parser *parser, c_token *token)
{
timevar_push (TV_LEX);
- token->type = c_lex_with_flags (&token->value, &token->location, NULL,
+ token->type = c_lex_with_flags (&token->value, &token->location,
+ &token->range,
+ NULL,
(parser->lex_untranslated_string
? C_LEX_STRING_NO_TRANSLATE : 0));
token->id_kind = C_ID_NONE;
diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index 67fbcda..7c59c58 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -58,7 +58,7 @@ along with GCC; see the file COPYING3. If not see
static cp_token eof_token =
{
- CPP_EOF, RID_MAX, 0, PRAGMA_NONE, false, false, false, 0, { NULL }
+ CPP_EOF, RID_MAX, 0, PRAGMA_NONE, false, false, false, 0, {0, 0}, { NULL }
};
/* The various kinds of non integral constant we encounter. */
@@ -764,7 +764,8 @@ cp_lexer_get_preprocessor_token (cp_lexer *lexer, cp_token
*token)
/* Get a new token from the preprocessor. */
token->type
- = c_lex_with_flags (&token->u.value, &token->location, &token->flags,
+ = c_lex_with_flags (&token->u.value, &token->location,
+ &token->range, &token->flags,
lexer == NULL ? 0 : C_LEX_STRING_NO_JOIN);
token->keyword = RID_MAX;
token->pragma_kind = PRAGMA_NONE;
diff --git a/gcc/cp/parser.h b/gcc/cp/parser.h
index 760467c..c7558a0 100644
--- a/gcc/cp/parser.h
+++ b/gcc/cp/parser.h
@@ -61,6 +61,8 @@ struct GTY (()) cp_token {
BOOL_BITFIELD purged_p : 1;
/* The location at which this token was found. */
location_t location;
+ /* The source range at which this token was found. */
+ source_range range;
/* The value associated with this token, if any. */
union cp_token_value {
/* Used for CPP_NESTED_NAME_SPECIFIER and CPP_TEMPLATE_ID. */
diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
index a2bdfa0..0b1a403 100644
--- a/libcpp/include/cpplib.h
+++ b/libcpp/include/cpplib.h
@@ -235,9 +235,11 @@ struct GTY(()) cpp_identifier {
};
/* A preprocessing token. This has been carefully packed and should
- occupy 16 bytes on 32-bit hosts and 24 bytes on 64-bit hosts. */
+ occupy 16 bytes on 32-bit hosts and 24 bytes on 64-bit hosts.
+ FIXME: the above comment is no longer true with this patch. */
struct GTY(()) cpp_token {
source_location src_loc; /* Location of first char of token. */
+ source_range src_range; /* Source range covered by the token. */
ENUM_BITFIELD(cpp_ttype) type : CHAR_BIT; /* token type */
unsigned short flags; /* flags - see above */
diff --git a/libcpp/lex.c b/libcpp/lex.c
index 0aa1090..a84a8c0 100644
--- a/libcpp/lex.c
+++ b/libcpp/lex.c
@@ -2365,6 +2365,9 @@ _cpp_lex_direct (cpp_reader *pfile)
result->src_loc = linemap_position_for_column (pfile->line_table,
CPP_BUF_COLUMN (buffer, buffer->cur));
+ /* The token's src_range begins here. */
+ result->src_range.m_start = result->src_loc;
+
switch (c)
{
case ' ': case '\t': case '\f': case '\v': case '\0':
@@ -2723,6 +2726,11 @@ _cpp_lex_direct (cpp_reader *pfile)
break;
}
+ /* The token's src_range ends here. */
+ result->src_range.m_finish =
+ linemap_position_for_column (pfile->line_table,
+ CPP_BUF_COLUMN (buffer, buffer->cur));
+
return result;
}
--
1.8.5.3