Author: Haojian Wu Date: 2022-07-22T09:13:09+02:00 New Revision: 2a88fb2ecb72300bfbbc74c586fb415cc18c9f9d
URL: https://github.com/llvm/llvm-project/commit/2a88fb2ecb72300bfbbc74c586fb415cc18c9f9d DIFF: https://github.com/llvm/llvm-project/commit/2a88fb2ecb72300bfbbc74c586fb415cc18c9f9d.diff LOG: [pseudo] Eliminate the dangling-else syntax ambiguity. - the grammar ambiguity is eliminated by a guard; - modify the guard function signatures, now all parameters are folded in to a single object, avoid a long parameter list (as we will add more parameters in the near future); Reviewed By: sammccall Differential Revision: https://reviews.llvm.org/D130160 Added: clang-tools-extra/pseudo/test/cxx/dangling-else.cpp Modified: clang-tools-extra/pseudo/include/clang-pseudo/Language.h clang-tools-extra/pseudo/lib/GLR.cpp clang-tools-extra/pseudo/lib/cxx/CXX.cpp clang-tools-extra/pseudo/lib/cxx/cxx.bnf clang-tools-extra/pseudo/unittests/GLRTest.cpp Removed: ################################################################################ diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/Language.h b/clang-tools-extra/pseudo/include/clang-pseudo/Language.h index 3696543915cba..1a2b71f081da0 100644 --- a/clang-tools-extra/pseudo/include/clang-pseudo/Language.h +++ b/clang-tools-extra/pseudo/include/clang-pseudo/Language.h @@ -19,6 +19,12 @@ class ForestNode; class TokenStream; class LRTable; +struct GuardParams { + llvm::ArrayRef<const ForestNode *> RHS; + const TokenStream &Tokens; + // FIXME: use the index of Tokens. + SymbolID Lookahead; +}; // A guard restricts when a grammar rule can be used. // // The GLR parser will use the guard to determine whether a rule reduction will @@ -26,8 +32,7 @@ class LRTable; // `virt-specifier := IDENTIFIER` only if the identifier's text is 'override`. // // Return true if the guard is satisfied. -using RuleGuard = llvm::function_ref<bool( - llvm::ArrayRef<const ForestNode *> RHS, const TokenStream &)>; +using RuleGuard = llvm::function_ref<bool(const GuardParams &)>; // A recovery strategy determines a region of code to skip when parsing fails. // diff --git a/clang-tools-extra/pseudo/lib/GLR.cpp b/clang-tools-extra/pseudo/lib/GLR.cpp index df8381d04326b..ab230accdf8f8 100644 --- a/clang-tools-extra/pseudo/lib/GLR.cpp +++ b/clang-tools-extra/pseudo/lib/GLR.cpp @@ -421,7 +421,7 @@ class GLRReduce { if (!R.Guarded) return true; if (auto Guard = Lang.Guards.lookup(RID)) - return Guard(RHS, Params.Code); + return Guard({RHS, Params.Code, Lookahead}); LLVM_DEBUG(llvm::dbgs() << llvm::formatv("missing guard implementation for rule {0}\n", Lang.G.dumpRule(RID))); diff --git a/clang-tools-extra/pseudo/lib/cxx/CXX.cpp b/clang-tools-extra/pseudo/lib/cxx/CXX.cpp index 8fa24bfbbd0b5..7fc3a48d63189 100644 --- a/clang-tools-extra/pseudo/lib/cxx/CXX.cpp +++ b/clang-tools-extra/pseudo/lib/cxx/CXX.cpp @@ -156,15 +156,19 @@ bool isFunctionDeclarator(const ForestNode *Declarator) { llvm_unreachable("unreachable"); } +bool guardNextTokenNotElse(const GuardParams &P) { + return symbolToToken(P.Lookahead) != tok::kw_else; +} + llvm::DenseMap<ExtensionID, RuleGuard> buildGuards() { #define TOKEN_GUARD(kind, cond) \ - [](llvm::ArrayRef<const ForestNode *> RHS, const TokenStream &Tokens) { \ - const Token &Tok = onlyToken(tok::kind, RHS, Tokens); \ + [](const GuardParams& P) { \ + const Token &Tok = onlyToken(tok::kind, P.RHS, P.Tokens); \ return cond; \ } #define SYMBOL_GUARD(kind, cond) \ - [](llvm::ArrayRef<const ForestNode *> RHS, const TokenStream &Tokens) { \ - const ForestNode &N = onlySymbol((SymbolID)Symbol::kind, RHS, Tokens); \ + [](const GuardParams& P) { \ + const ForestNode &N = onlySymbol((SymbolID)Symbol::kind, P.RHS, P.Tokens); \ return cond; \ } return { @@ -186,6 +190,11 @@ llvm::DenseMap<ExtensionID, RuleGuard> buildGuards() { {(RuleID)Rule::contextual_zero_0numeric_constant, TOKEN_GUARD(numeric_constant, Tok.text() == "0")}, + {(RuleID)Rule::selection_statement_0if_1l_paren_2condition_3r_paren_4statement, + guardNextTokenNotElse}, + {(RuleID)Rule::selection_statement_0if_1constexpr_2l_paren_3condition_4r_paren_5statement, + guardNextTokenNotElse}, + // The grammar distinguishes (only) user-defined vs plain string literals, // where the clang lexer distinguishes (only) encoding types. {(RuleID)Rule::user_defined_string_literal_chunk_0string_literal, diff --git a/clang-tools-extra/pseudo/lib/cxx/cxx.bnf b/clang-tools-extra/pseudo/lib/cxx/cxx.bnf index d49fb8fb7cf42..8138d0fd481ed 100644 --- a/clang-tools-extra/pseudo/lib/cxx/cxx.bnf +++ b/clang-tools-extra/pseudo/lib/cxx/cxx.bnf @@ -290,7 +290,7 @@ expression-statement := expression_opt ; compound-statement := { statement-seq_opt [recover=Brackets] } statement-seq := statement statement-seq := statement-seq statement -selection-statement := IF CONSTEXPR_opt ( init-statement_opt condition ) statement +selection-statement := IF CONSTEXPR_opt ( init-statement_opt condition ) statement [guard] selection-statement := IF CONSTEXPR_opt ( init-statement_opt condition ) statement ELSE statement selection-statement := SWITCH ( init-statement_opt condition ) statement iteration-statement := WHILE ( condition ) statement diff --git a/clang-tools-extra/pseudo/test/cxx/dangling-else.cpp b/clang-tools-extra/pseudo/test/cxx/dangling-else.cpp new file mode 100644 index 0000000000000..151f3931b53f9 --- /dev/null +++ b/clang-tools-extra/pseudo/test/cxx/dangling-else.cpp @@ -0,0 +1,22 @@ +// RUN: clang-pseudo -grammar=cxx -source=%s --start-symbol=statement-seq --print-forest | FileCheck %s + +// Verify the else should belong to the nested if statement +if (true) if (true) {} else {} + +// CHECK: statement-seq~selection-statement := IF ( condition ) statement +// CHECK-NEXT: ├─IF +// CHECK-NEXT: ├─( +// CHECK-NEXT: ├─condition~TRUE +// CHECK-NEXT: ├─) +// CHECK-NEXT: └─statement~selection-statement +// CHECK-NEXT: ├─IF +// CHECK-NEXT: ├─( +// CHECK-NEXT: ├─condition~TRUE +// CHECK-NEXT: ├─) +// CHECK-NEXT: ├─statement~compound-statement := { } +// CHECK-NEXT: │ ├─{ +// CHECK-NEXT: │ └─} +// CHECK-NEXT: ├─ELSE +// CHECK-NEXT: └─statement~compound-statement := { } +// CHECK-NEXT: ├─{ +// CHECK-NEXT: └─} diff --git a/clang-tools-extra/pseudo/unittests/GLRTest.cpp b/clang-tools-extra/pseudo/unittests/GLRTest.cpp index 2c3ef265de392..5f87efec67044 100644 --- a/clang-tools-extra/pseudo/unittests/GLRTest.cpp +++ b/clang-tools-extra/pseudo/unittests/GLRTest.cpp @@ -634,11 +634,12 @@ TEST_F(GLRTest, GuardExtension) { start := IDENTIFIER [guard] )bnf"); TestLang.Guards.try_emplace( - ruleFor("start"), - [&](llvm::ArrayRef<const ForestNode *> RHS, const TokenStream &Tokens) { - assert(RHS.size() == 1 && - RHS.front()->symbol() == tokenSymbol(clang::tok::identifier)); - return Tokens.tokens()[RHS.front()->startTokenIndex()].text() == "test"; + ruleFor("start"), [&](const GuardParams &P) { + assert(P.RHS.size() == 1 && + P.RHS.front()->symbol() == + tokenSymbol(clang::tok::identifier)); + return P.Tokens.tokens()[P.RHS.front()->startTokenIndex()] + .text() == "test"; }); clang::LangOptions LOptions; TestLang.Table = LRTable::buildSLR(TestLang.G); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits