Author: Eduardo Caldas Date: 2020-07-10T16:21:11Z New Revision: f33c2c27a8d4ea831aa7c2c2649066be91318d85
URL: https://github.com/llvm/llvm-project/commit/f33c2c27a8d4ea831aa7c2c2649066be91318d85 DIFF: https://github.com/llvm/llvm-project/commit/f33c2c27a8d4ea831aa7c2c2649066be91318d85.diff LOG: Fix crash on `user defined literals` Summary: Given an UserDefinedLiteral `1.2_w`: Problem: Lexer generates one Token for the literal, but ClangAST references two source locations Fix: Ignore the operator and interpret it as the underlying literal. e.g.: `1.2_w` token generates syntax node IntegerLiteral(1.2_w) Subscribers: cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D82157 Added: Modified: clang/include/clang/Tooling/Syntax/Nodes.h clang/lib/Tooling/Syntax/BuildTree.cpp clang/lib/Tooling/Syntax/Nodes.cpp clang/unittests/Tooling/Syntax/TreeTest.cpp Removed: ################################################################################ diff --git a/clang/include/clang/Tooling/Syntax/Nodes.h b/clang/include/clang/Tooling/Syntax/Nodes.h index 97605ceb76b7..fb63c36bc4cc 100644 --- a/clang/include/clang/Tooling/Syntax/Nodes.h +++ b/clang/include/clang/Tooling/Syntax/Nodes.h @@ -50,6 +50,11 @@ enum class NodeKind : uint16_t { StringLiteralExpression, BoolLiteralExpression, CxxNullPtrExpression, + UnknownUserDefinedLiteralExpression, + IntegerUserDefinedLiteralExpression, + FloatUserDefinedLiteralExpression, + CharUserDefinedLiteralExpression, + StringUserDefinedLiteralExpression, IdExpression, // Statements. @@ -325,6 +330,88 @@ class CxxNullPtrExpression final : public Expression { syntax::Leaf *nullPtrKeyword(); }; +/// Expression for user-defined literal. C++ [lex.ext] +/// user-defined-literal: +/// user-defined-integer-literal +/// user-defined-floating-point-literal +/// user-defined-string-literal +/// user-defined-character-literal +class UserDefinedLiteralExpression : public Expression { +public: + UserDefinedLiteralExpression(NodeKind K) : Expression(K) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::UnknownUserDefinedLiteralExpression || + N->kind() == NodeKind::IntegerUserDefinedLiteralExpression || + N->kind() == NodeKind::FloatUserDefinedLiteralExpression || + N->kind() == NodeKind::CharUserDefinedLiteralExpression || + N->kind() == NodeKind::StringUserDefinedLiteralExpression; + } + syntax::Leaf *literalToken(); +}; + +// We cannot yet distinguish between user-defined-integer-literal and +// user-defined-floating-point-literal, when using raw literal operator or +// numeric literal operator. C++ [lex.ext]p3, p4 +/// Expression for an unknown user-defined-literal. +class UnknownUserDefinedLiteralExpression final + : public UserDefinedLiteralExpression { +public: + UnknownUserDefinedLiteralExpression() + : UserDefinedLiteralExpression( + NodeKind::UnknownUserDefinedLiteralExpression) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::UnknownUserDefinedLiteralExpression; + } +}; + +/// Expression for user-defined-integer-literal. C++ [lex.ext] +class IntegerUserDefinedLiteralExpression final + : public UserDefinedLiteralExpression { +public: + IntegerUserDefinedLiteralExpression() + : UserDefinedLiteralExpression( + NodeKind::IntegerUserDefinedLiteralExpression) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::IntegerUserDefinedLiteralExpression; + } +}; + +/// Expression for user-defined-floating-point-literal. C++ [lex.ext] +class FloatUserDefinedLiteralExpression final + : public UserDefinedLiteralExpression { +public: + FloatUserDefinedLiteralExpression() + : UserDefinedLiteralExpression( + NodeKind::FloatUserDefinedLiteralExpression) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::FloatUserDefinedLiteralExpression; + } +}; + +/// Expression for user-defined-character-literal. C++ [lex.ext] +class CharUserDefinedLiteralExpression final + : public UserDefinedLiteralExpression { +public: + CharUserDefinedLiteralExpression() + : UserDefinedLiteralExpression( + NodeKind::CharUserDefinedLiteralExpression) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::CharUserDefinedLiteralExpression; + } +}; + +/// Expression for user-defined-string-literal. C++ [lex.ext] +class StringUserDefinedLiteralExpression final + : public UserDefinedLiteralExpression { +public: + StringUserDefinedLiteralExpression() + : UserDefinedLiteralExpression( + NodeKind::StringUserDefinedLiteralExpression) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::StringUserDefinedLiteralExpression; + } +}; + /// An abstract class for prefix and postfix unary operators. class UnaryOperatorExpression : public Expression { public: diff --git a/clang/lib/Tooling/Syntax/BuildTree.cpp b/clang/lib/Tooling/Syntax/BuildTree.cpp index f9fdf47bff26..8204d3fc66f3 100644 --- a/clang/lib/Tooling/Syntax/BuildTree.cpp +++ b/clang/lib/Tooling/Syntax/BuildTree.cpp @@ -216,7 +216,8 @@ static SourceRange getDeclaratorRange(const SourceManager &SM, TypeLoc T, } if (Initializer.isValid()) { auto InitializerEnd = Initializer.getEnd(); - assert(SM.isBeforeInTranslationUnit(End, InitializerEnd) || End == InitializerEnd); + assert(SM.isBeforeInTranslationUnit(End, InitializerEnd) || + End == InitializerEnd); End = InitializerEnd; } return SourceRange(Start, End); @@ -708,6 +709,42 @@ class BuildTreeVisitor : public RecursiveASTVisitor<BuildTreeVisitor> { return NNS; } + bool TraverseUserDefinedLiteral(UserDefinedLiteral *S) { + // The semantic AST node `UserDefinedLiteral` (UDL) may have one child node + // referencing the location of the UDL suffix (`_w` in `1.2_w`). The + // UDL suffix location does not point to the beginning of a token, so we + // can't represent the UDL suffix as a separate syntax tree node. + + return WalkUpFromUserDefinedLiteral(S); + } + + syntax::NodeKind getUserDefinedLiteralKind(UserDefinedLiteral *S) { + switch (S->getLiteralOperatorKind()) { + case clang::UserDefinedLiteral::LOK_Integer: + return syntax::NodeKind::IntegerUserDefinedLiteralExpression; + case clang::UserDefinedLiteral::LOK_Floating: + return syntax::NodeKind::FloatUserDefinedLiteralExpression; + case clang::UserDefinedLiteral::LOK_Character: + return syntax::NodeKind::CharUserDefinedLiteralExpression; + case clang::UserDefinedLiteral::LOK_String: + return syntax::NodeKind::StringUserDefinedLiteralExpression; + case clang::UserDefinedLiteral::LOK_Raw: + case clang::UserDefinedLiteral::LOK_Template: + // FIXME: Apply `NumericLiteralParser` to the underlying token to deduce + // the right UDL kind. That would require a `Preprocessor` though. + return syntax::NodeKind::UnknownUserDefinedLiteralExpression; + } + } + + bool WalkUpFromUserDefinedLiteral(UserDefinedLiteral *S) { + Builder.markChildToken(S->getBeginLoc(), syntax::NodeRole::LiteralToken); + Builder.foldNode(Builder.getExprRange(S), + new (allocator()) syntax::UserDefinedLiteralExpression( + getUserDefinedLiteralKind(S)), + S); + return true; + } + bool WalkUpFromDeclRefExpr(DeclRefExpr *S) { if (auto *NNS = BuildNestedNameSpecifier(S->getQualifierLoc())) Builder.markChild(NNS, syntax::NodeRole::IdExpression_qualifier); @@ -817,9 +854,9 @@ class BuildTreeVisitor : public RecursiveASTVisitor<BuildTreeVisitor> { bool TraverseCXXOperatorCallExpr(CXXOperatorCallExpr *S) { if (getOperatorNodeKind(*S) == syntax::NodeKind::PostfixUnaryOperatorExpression) { - // A postfix unary operator is declared as taking two operands. The second - // operand is used to distinguish from its prefix counterpart. In the - // semantic AST this "phantom" operand is represented as a + // A postfix unary operator is declared as taking two operands. The + // second operand is used to distinguish from its prefix counterpart. In + // the semantic AST this "phantom" operand is represented as a // `IntegerLiteral` with invalid `SourceLocation`. We skip visiting this // operand because it does not correspond to anything written in source // code diff --git a/clang/lib/Tooling/Syntax/Nodes.cpp b/clang/lib/Tooling/Syntax/Nodes.cpp index 3d9b943d6db1..e1aa2521a2a9 100644 --- a/clang/lib/Tooling/Syntax/Nodes.cpp +++ b/clang/lib/Tooling/Syntax/Nodes.cpp @@ -32,6 +32,16 @@ llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, NodeKind K) { return OS << "BoolLiteralExpression"; case NodeKind::CxxNullPtrExpression: return OS << "CxxNullPtrExpression"; + case NodeKind::UnknownUserDefinedLiteralExpression: + return OS << "UnknownUserDefinedLiteralExpression"; + case NodeKind::IntegerUserDefinedLiteralExpression: + return OS << "IntegerUserDefinedLiteralExpression"; + case NodeKind::FloatUserDefinedLiteralExpression: + return OS << "FloatUserDefinedLiteralExpression"; + case NodeKind::CharUserDefinedLiteralExpression: + return OS << "CharUserDefinedLiteralExpression"; + case NodeKind::StringUserDefinedLiteralExpression: + return OS << "StringUserDefinedLiteralExpression"; case NodeKind::PrefixUnaryOperatorExpression: return OS << "PrefixUnaryOperatorExpression"; case NodeKind::PostfixUnaryOperatorExpression: @@ -252,6 +262,11 @@ syntax::Leaf *syntax::CxxNullPtrExpression::nullPtrKeyword() { findChild(syntax::NodeRole::LiteralToken)); } +syntax::Leaf *syntax::UserDefinedLiteralExpression::literalToken() { + return llvm::cast_or_null<syntax::Leaf>( + findChild(syntax::NodeRole::LiteralToken)); +} + syntax::Expression *syntax::BinaryOperatorExpression::lhs() { return llvm::cast_or_null<syntax::Expression>( findChild(syntax::NodeRole::BinaryOperatorExpression_leftHandSide)); diff --git a/clang/unittests/Tooling/Syntax/TreeTest.cpp b/clang/unittests/Tooling/Syntax/TreeTest.cpp index acd0fbf2b52e..91e7a8f33e4e 100644 --- a/clang/unittests/Tooling/Syntax/TreeTest.cpp +++ b/clang/unittests/Tooling/Syntax/TreeTest.cpp @@ -1184,20 +1184,108 @@ void test() { )txt")); } -TEST_P(SyntaxTreeTest, IntegerLiteral) { +TEST_P(SyntaxTreeTest, UserDefinedLiteral) { + if (!GetParam().isCXX11OrLater()) { + return; + } EXPECT_TRUE(treeDumpEqual( R"cpp( +unsigned operator "" _i(unsigned long long); +unsigned operator "" _f(long double); +unsigned operator "" _c(char); + +unsigned operator "" _r(const char*); // raw-literal operator + +template <char...> +unsigned operator "" _t(); // numeric literal operator template + void test() { - 12; - 12u; - 12l; - 12ul; - 014; - 0XC; + 12_i; // call: operator "" _i(12uLL) | kind: integer + 1.2_f; // call: operator "" _f(1.2L) | kind: float + '2'_c; // call: operator "" _c('2') | kind: char + + // TODO: Generate `FloatUserDefinedLiteralExpression` and + // `IntegerUserDefinedLiteralExpression` instead of + // `UnknownUserDefinedLiteralExpression`. See `getUserDefinedLiteralKind` + 12_r; // call: operator "" _r("12") | kind: integer + 1.2_r; // call: operator "" _i("1.2") | kind: float + 12_t; // call: operator<'1', '2'> "" _x() | kind: integer + 1.2_t; // call: operator<'1', '2'> "" _x() | kind: float } -)cpp", + )cpp", R"txt( *: TranslationUnit +|-SimpleDeclaration +| |-unsigned +| |-SimpleDeclarator +| | |-operator +| | |-"" +| | |-_i +| | `-ParametersAndQualifiers +| | |-( +| | |-SimpleDeclaration +| | | |-unsigned +| | | |-long +| | | `-long +| | `-) +| `-; +|-SimpleDeclaration +| |-unsigned +| |-SimpleDeclarator +| | |-operator +| | |-"" +| | |-_f +| | `-ParametersAndQualifiers +| | |-( +| | |-SimpleDeclaration +| | | |-long +| | | `-double +| | `-) +| `-; +|-SimpleDeclaration +| |-unsigned +| |-SimpleDeclarator +| | |-operator +| | |-"" +| | |-_c +| | `-ParametersAndQualifiers +| | |-( +| | |-SimpleDeclaration +| | | `-char +| | `-) +| `-; +|-SimpleDeclaration +| |-unsigned +| |-SimpleDeclarator +| | |-operator +| | |-"" +| | |-_r +| | `-ParametersAndQualifiers +| | |-( +| | |-SimpleDeclaration +| | | |-const +| | | |-char +| | | `-SimpleDeclarator +| | | `-* +| | `-) +| `-; +|-TemplateDeclaration +| |-template +| |-< +| |-SimpleDeclaration +| | `-char +| |-... +| |-> +| `-SimpleDeclaration +| |-unsigned +| |-SimpleDeclarator +| | |-operator +| | |-"" +| | |-_t +| | `-ParametersAndQualifiers +| | |-( +| | `-) +| `-; `-SimpleDeclaration |-void |-SimpleDeclarator @@ -1208,28 +1296,95 @@ void test() { `-CompoundStatement |-{ |-ExpressionStatement - | |-IntegerLiteralExpression - | | `-12 + | |-IntegerUserDefinedLiteralExpression + | | `-12_i | `-; |-ExpressionStatement - | |-IntegerLiteralExpression - | | `-12u + | |-FloatUserDefinedLiteralExpression + | | `-1.2_f | `-; |-ExpressionStatement - | |-IntegerLiteralExpression - | | `-12l + | |-CharUserDefinedLiteralExpression + | | `-'2'_c | `-; |-ExpressionStatement - | |-IntegerLiteralExpression - | | `-12ul + | |-UnknownUserDefinedLiteralExpression + | | `-12_r | `-; |-ExpressionStatement - | |-IntegerLiteralExpression - | | `-014 + | |-UnknownUserDefinedLiteralExpression + | | `-1.2_r | `-; |-ExpressionStatement - | |-IntegerLiteralExpression - | | `-0XC + | |-UnknownUserDefinedLiteralExpression + | | `-12_t + | `-; + |-ExpressionStatement + | |-UnknownUserDefinedLiteralExpression + | | `-1.2_t + | `-; + `-} +)txt")); +} + +TEST_P(SyntaxTreeTest, UserDefinedLiteralString) { + if (!GetParam().isCXX11OrLater()) { + return; + } + EXPECT_TRUE(treeDumpEqual( + R"cpp( +typedef decltype(sizeof(void *)) size_t; +unsigned operator "" _s(const char*, size_t); +void test() { + "12"_s;// call: operator "" _s("12") | kind: string +} + )cpp", + R"txt( +*: TranslationUnit +|-SimpleDeclaration +| |-typedef +| |-decltype +| |-( +| |-UnknownExpression +| | |-sizeof +| | |-( +| | |-void +| | |-* +| | `-) +| |-) +| |-SimpleDeclarator +| | `-size_t +| `-; +|-SimpleDeclaration +| |-unsigned +| |-SimpleDeclarator +| | |-operator +| | |-"" +| | |-_s +| | `-ParametersAndQualifiers +| | |-( +| | |-SimpleDeclaration +| | | |-const +| | | |-char +| | | `-SimpleDeclarator +| | | `-* +| | |-, +| | |-SimpleDeclaration +| | | `-size_t +| | `-) +| `-; +`-SimpleDeclaration + |-void + |-SimpleDeclarator + | |-test + | `-ParametersAndQualifiers + | |-( + | `-) + `-CompoundStatement + |-{ + |-ExpressionStatement + | |-StringUserDefinedLiteralExpression + | | `-"12"_s | `-; `-} )txt")); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits