https://github.com/owenca created https://github.com/llvm/llvm-project/pull/79115
Depends on #79037. >From 61a06b77e8e6f2b5c5a9a2aa0b7f46260545f5b4 Mon Sep 17 00:00:00 2001 From: Carl Peto <cp...@becrypt.com> Date: Mon, 22 Jan 2024 18:52:46 +0000 Subject: [PATCH 1/2] [clang] - Sema::isSimpleTypeSpecifier return true for _Bool in c99 (currently returns false for _Bool, regardless of C dialect). (Fixes #72203) - move simple type decision code into shared location (IdentifierInfo) - replace the logic with a check for simple types and a proper check for a valid keyword in the appropriate dialect - change all call sites to match the above new API --- clang/include/clang/Basic/IdentifierTable.h | 4 ++ clang/include/clang/Sema/Sema.h | 2 +- clang/lib/Basic/IdentifierTable.cpp | 39 +++++++++++++++++ clang/lib/Parse/ParseExpr.cpp | 3 +- clang/lib/Parse/ParseObjc.cpp | 3 +- clang/lib/Sema/SemaDecl.cpp | 48 +-------------------- 6 files changed, 50 insertions(+), 49 deletions(-) diff --git a/clang/include/clang/Basic/IdentifierTable.h b/clang/include/clang/Basic/IdentifierTable.h index 1ac182d4fce26f..2c979e438e81bb 100644 --- a/clang/include/clang/Basic/IdentifierTable.h +++ b/clang/include/clang/Basic/IdentifierTable.h @@ -427,6 +427,10 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo { /// language. bool isCPlusPlusKeyword(const LangOptions &LangOpts) const; + /// Return true if this token is a simple type specifier + /// in the specified language. + bool isSimpleTypeSpecifier(const LangOptions &LangOpts) const; + /// Get and set FETokenInfo. The language front-end is allowed to associate /// arbitrary metadata with this token. void *getFETokenInfo() const { return FETokenInfo; } diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 0db39333b0ee34..bc1fd19b5c6de7 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -2636,7 +2636,7 @@ class Sema final { void DiagnoseUseOfUnimplementedSelectors(); - bool isSimpleTypeSpecifier(tok::TokenKind Kind) const; + bool isSimpleTypeSpecifier(const IdentifierInfo &II) const; ParsedType getTypeName(const IdentifierInfo &II, SourceLocation NameLoc, Scope *S, CXXScopeSpec *SS = nullptr, diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp index d0d8316385b452..78c783cdff6b5e 100644 --- a/clang/lib/Basic/IdentifierTable.cpp +++ b/clang/lib/Basic/IdentifierTable.cpp @@ -419,6 +419,45 @@ StringRef IdentifierInfo::deuglifiedName() const { return Name; } +/// Determine whether the token kind starts a simple-type-specifier. +bool IdentifierInfo::isSimpleTypeSpecifier(const LangOptions &LangOpts) const { + auto Kind = getTokenID(); + + switch (Kind) { + case tok::kw_short: + case tok::kw_long: + case tok::kw___int64: + case tok::kw___int128: + case tok::kw_signed: + case tok::kw_unsigned: + case tok::kw_void: + case tok::kw_char: + case tok::kw_int: + case tok::kw_half: + case tok::kw_float: + case tok::kw_double: + case tok::kw___bf16: + case tok::kw__Float16: + case tok::kw___float128: + case tok::kw_wchar_t: + case tok::kw_bool: + case tok::kw___underlying_type: + case tok::kw___auto_type: + case tok::kw__Bool: + case tok::annot_typename: + case tok::kw_char16_t: + case tok::kw_char32_t: + case tok::kw_typeof: + case tok::annot_decltype: + case tok::kw_decltype: + case tok::kw_char8_t: + return isKeyword(LangOpts); + + default: + return false; + } +} + tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const { // We use a perfect hash function here involving the length of the keyword, // the first and third character. For preprocessor ID's there are no diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp index e862856a08ca11..8f9f918bf544fd 100644 --- a/clang/lib/Parse/ParseExpr.cpp +++ b/clang/lib/Parse/ParseExpr.cpp @@ -1597,7 +1597,8 @@ ExprResult Parser::ParseCastExpression(CastParseKind ParseKind, if (TryAnnotateTypeOrScopeToken()) return ExprError(); - if (!Actions.isSimpleTypeSpecifier(Tok.getKind())) + if (!Tok.getIdentifierInfo() || + !Actions.isSimpleTypeSpecifier(*Tok.getIdentifierInfo())) // We are trying to parse a simple-type-specifier but might not get such // a token after error recovery. return ExprError(); diff --git a/clang/lib/Parse/ParseObjc.cpp b/clang/lib/Parse/ParseObjc.cpp index 849fd1ac95a442..5565770610c491 100644 --- a/clang/lib/Parse/ParseObjc.cpp +++ b/clang/lib/Parse/ParseObjc.cpp @@ -2971,7 +2971,8 @@ bool Parser::ParseObjCXXMessageReceiver(bool &IsExpr, void *&TypeOrExpr) { tok::annot_cxxscope)) TryAnnotateTypeOrScopeToken(); - if (!Actions.isSimpleTypeSpecifier(Tok.getKind())) { + if (!Tok.getIdentifierInfo() || + !Actions.isSimpleTypeSpecifier(*Tok.getIdentifierInfo())) { // objc-receiver: // expression // Make sure any typos in the receiver are corrected or diagnosed, so that diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 8dff2cdc063df3..a20894adda00f9 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -128,52 +128,8 @@ class TypeNameValidatorCCC final : public CorrectionCandidateCallback { } // end anonymous namespace /// Determine whether the token kind starts a simple-type-specifier. -bool Sema::isSimpleTypeSpecifier(tok::TokenKind Kind) const { - switch (Kind) { - // FIXME: Take into account the current language when deciding whether a - // token kind is a valid type specifier - case tok::kw_short: - case tok::kw_long: - case tok::kw___int64: - case tok::kw___int128: - case tok::kw_signed: - case tok::kw_unsigned: - case tok::kw_void: - case tok::kw_char: - case tok::kw_int: - case tok::kw_half: - case tok::kw_float: - case tok::kw_double: - case tok::kw___bf16: - case tok::kw__Float16: - case tok::kw___float128: - case tok::kw___ibm128: - case tok::kw_wchar_t: - case tok::kw_bool: - case tok::kw__Accum: - case tok::kw__Fract: - case tok::kw__Sat: -#define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) case tok::kw___##Trait: -#include "clang/Basic/TransformTypeTraits.def" - case tok::kw___auto_type: - return true; - - case tok::annot_typename: - case tok::kw_char16_t: - case tok::kw_char32_t: - case tok::kw_typeof: - case tok::annot_decltype: - case tok::kw_decltype: - return getLangOpts().CPlusPlus; - - case tok::kw_char8_t: - return getLangOpts().Char8; - - default: - break; - } - - return false; +bool Sema::isSimpleTypeSpecifier(const IdentifierInfo &II) const { + return II.isSimpleTypeSpecifier(getLangOpts()); } namespace { >From 40645124c612cec71f5753fa2e97c6a01406f86a Mon Sep 17 00:00:00 2001 From: Owen Pan <owenpi...@gmail.com> Date: Tue, 23 Jan 2024 02:06:49 -0800 Subject: [PATCH 2/2] [clang-format] Update FormatToken::isSimpleTypeSpecifier() --- clang/include/clang/Format/Format.h | 2 ++ clang/lib/Format/FormatToken.cpp | 36 ++------------------------- clang/lib/Format/FormatTokenLexer.cpp | 7 +++--- clang/lib/Format/FormatTokenLexer.h | 1 - 4 files changed, 8 insertions(+), 38 deletions(-) diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h index bc9eecd42f9ebf..2800f6db4a9786 100644 --- a/clang/include/clang/Format/Format.h +++ b/clang/include/clang/Format/Format.h @@ -5160,6 +5160,8 @@ tooling::Replacements sortUsingDeclarations(const FormatStyle &Style, ArrayRef<tooling::Range> Ranges, StringRef FileName = "<stdin>"); +extern LangOptions LangOpts; + /// Returns the ``LangOpts`` that the formatter expects you to set. /// /// \param Style determines specific settings for lexing mode. diff --git a/clang/lib/Format/FormatToken.cpp b/clang/lib/Format/FormatToken.cpp index b791c5a26bbe3a..a030d91d5589a5 100644 --- a/clang/lib/Format/FormatToken.cpp +++ b/clang/lib/Format/FormatToken.cpp @@ -34,41 +34,9 @@ const char *getTokenTypeName(TokenType Type) { return nullptr; } -// FIXME: This is copy&pasted from Sema. Put it in a common place and remove -// duplication. bool FormatToken::isSimpleTypeSpecifier() const { - switch (Tok.getKind()) { - case tok::kw_short: - case tok::kw_long: - case tok::kw___int64: - case tok::kw___int128: - case tok::kw_signed: - case tok::kw_unsigned: - case tok::kw_void: - case tok::kw_char: - case tok::kw_int: - case tok::kw_half: - case tok::kw_float: - case tok::kw_double: - case tok::kw___bf16: - case tok::kw__Float16: - case tok::kw___float128: - case tok::kw___ibm128: - case tok::kw_wchar_t: - case tok::kw_bool: -#define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) case tok::kw___##Trait: -#include "clang/Basic/TransformTypeTraits.def" - case tok::annot_typename: - case tok::kw_char8_t: - case tok::kw_char16_t: - case tok::kw_char32_t: - case tok::kw_typeof: - case tok::kw_decltype: - case tok::kw__Atomic: - return true; - default: - return false; - } + const auto *IdentifierInfo = Tok.getIdentifierInfo(); + return IdentifierInfo && IdentifierInfo->isSimpleTypeSpecifier(LangOpts); } bool FormatToken::isTypeOrIdentifier() const { diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp index 52a55ea23b5f2f..e6575dd417438a 100644 --- a/clang/lib/Format/FormatTokenLexer.cpp +++ b/clang/lib/Format/FormatTokenLexer.cpp @@ -22,18 +22,20 @@ namespace clang { namespace format { +LangOptions LangOpts; + FormatTokenLexer::FormatTokenLexer( const SourceManager &SourceMgr, FileID ID, unsigned Column, const FormatStyle &Style, encoding::Encoding Encoding, llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator, IdentifierTable &IdentTable) : FormatTok(nullptr), IsFirstToken(true), StateStack({LexerState::NORMAL}), - Column(Column), TrailingWhitespace(0), - LangOpts(getFormattingLangOpts(Style)), SourceMgr(SourceMgr), ID(ID), + Column(Column), TrailingWhitespace(0), SourceMgr(SourceMgr), ID(ID), Style(Style), IdentTable(IdentTable), Keywords(IdentTable), Encoding(Encoding), Allocator(Allocator), FirstInLineIndex(0), FormattingDisabled(false), MacroBlockBeginRegex(Style.MacroBlockBegin), MacroBlockEndRegex(Style.MacroBlockEnd) { + LangOpts = getFormattingLangOpts(Style); Lex.reset(new Lexer(ID, SourceMgr.getBufferOrFake(ID), SourceMgr, LangOpts)); Lex->SetKeepWhitespaceMode(true); @@ -1411,7 +1413,6 @@ void FormatTokenLexer::readRawToken(FormatToken &Tok) { void FormatTokenLexer::resetLexer(unsigned Offset) { StringRef Buffer = SourceMgr.getBufferData(ID); - LangOpts = getFormattingLangOpts(Style); Lex.reset(new Lexer(SourceMgr.getLocForStartOfFile(ID), LangOpts, Buffer.begin(), Buffer.begin() + Offset, Buffer.end())); Lex->SetKeepWhitespaceMode(true); diff --git a/clang/lib/Format/FormatTokenLexer.h b/clang/lib/Format/FormatTokenLexer.h index 65dd733bd53352..52838f1d8a17f5 100644 --- a/clang/lib/Format/FormatTokenLexer.h +++ b/clang/lib/Format/FormatTokenLexer.h @@ -120,7 +120,6 @@ class FormatTokenLexer { unsigned Column; unsigned TrailingWhitespace; std::unique_ptr<Lexer> Lex; - LangOptions LangOpts; const SourceManager &SourceMgr; FileID ID; const FormatStyle &Style; _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits