================ @@ -538,11 +565,254 @@ std::optional<InvalidName> checkName(const NamedDecl &RenameDecl, Conflict->getLocation().printToString(ASTCtx.getSourceManager())}; } } - if (Result) + if (Result) { InvalidNameMetric.record(1, toString(Result->K)); + return makeError(*Result); + } + return std::nullopt; +} + +bool isMatchingSelectorName(const syntax::Token &Cur, const syntax::Token &Next, + const SourceManager &SM, + llvm::StringRef SelectorName) { + if (SelectorName.empty()) + return Cur.kind() == tok::colon; + return Cur.kind() == tok::identifier && Next.kind() == tok::colon && + Cur.text(SM) == SelectorName && + // We require the selector name and : to be contiguous to avoid + // potential conflicts with ternary expression. + // + // e.g. support `foo:` but not `foo :`. + Cur.endLocation() == Next.location(); +} + +bool isSelectorLike(const syntax::Token &Cur, const syntax::Token &Next) { + return Cur.kind() == tok::identifier && Next.kind() == tok::colon && + // We require the selector name and : to be contiguous. + // e.g. support `foo:` but not `foo :`. + Cur.endLocation() == Next.location(); +} + +bool parseMessageExpression(llvm::ArrayRef<syntax::Token> Tokens, + const SourceManager &SM, unsigned Index, + unsigned Last, Selector Sel, + std::vector<Range> &SelectorPieces) { + + unsigned NumArgs = Sel.getNumArgs(); + llvm::SmallVector<char, 8> Closes; + SelectorPieces.clear(); + while (Index < Last) { + const auto &Tok = Tokens[Index]; + + if (Closes.empty()) { + auto PieceCount = SelectorPieces.size(); + if (PieceCount < NumArgs && + isMatchingSelectorName(Tok, Tokens[Index + 1], SM, + Sel.getNameForSlot(PieceCount))) { + // If 'foo:' instead of ':' (empty selector), we need to skip the ':' + // token after the name. + if (!Sel.getNameForSlot(PieceCount).empty()) { + ++Index; + } + SelectorPieces.push_back( + halfOpenToRange(SM, Tok.range(SM).toCharRange(SM))); + continue; + } + // If we've found all pieces but the current token looks like another + // selector piece, it means the method being renamed is a strict prefix of + // the selector we've found - should be skipped. + if (SelectorPieces.size() >= NumArgs && + isSelectorLike(Tok, Tokens[Index + 1])) + return false; + } + + switch (Tok.kind()) { + case tok::l_square: + Closes.push_back(']'); + break; + case tok::l_paren: + Closes.push_back(')'); + break; + case tok::l_brace: + Closes.push_back('}'); + break; + case tok::r_square: + if (Closes.empty()) + return SelectorPieces.size() == NumArgs; + + if (Closes.back() != ']') + return false; + Closes.pop_back(); + break; + case tok::r_paren: + if (Closes.empty() || Closes.back() != ')') + return false; + Closes.pop_back(); + break; + case tok::r_brace: + if (Closes.empty() || Closes.back() != '}') + return false; + Closes.pop_back(); + break; + case tok::semi: + // top level ; ends all statements. + if (Closes.empty()) + return false; + break; + default: + break; + } + + ++Index; + } + return false; +} + +/// Collects all ranges of the given identifier/selector in the source code. +/// +/// If a selector is given, this does a full lex of the given source code in +/// order to identify all selector fragments (e.g. in method exprs/decls) since +/// they are non-contiguous. +std::vector<SymbolRange> collectRenameIdentifierRanges( + llvm::StringRef Identifier, llvm::StringRef Content, + const LangOptions &LangOpts, std::optional<Selector> Selector) { + std::vector<SymbolRange> Ranges; + if (!Selector) { + auto IdentifierRanges = + collectIdentifierRanges(Identifier, Content, LangOpts); + for (const auto &R : IdentifierRanges) + Ranges.emplace_back(R); + return Ranges; + } + // FIXME: InMemoryFileAdapter crashes unless the buffer is null terminated! + std::string NullTerminatedCode = Content.str(); + SourceManagerForFile FileSM("mock_file_name.cpp", NullTerminatedCode); + auto &SM = FileSM.get(); + + // We track parens and braces to ensure that we don't accidentally try parsing + // a method declaration or definition which isn't at the top level or similar + // looking expressions (e.g. an @selector() expression). + unsigned ParenCount = 0; + unsigned BraceCount = 0; + unsigned NumArgs = Selector->getNumArgs(); + + std::vector<Range> SelectorPieces; + auto Tokens = syntax::tokenize(SM.getMainFileID(), SM, LangOpts); + unsigned Last = Tokens.size() - 1; + for (unsigned Index = 0; Index < Last; ++Index) { + const auto &Tok = Tokens[Index]; + ---------------- kadircet wrote:
Sorry, i am still having a lot of trouble following this logic. I don't really see why we need all this complicated top-levelness handling in both places and can't just try to find relevant identifiers in the code, while skipping anything inside the braces, as we were discussing last time. what about: ``` for (unsigned Index = 0; Index < Last - 1; ++Index) { const auto &Tok = Tokens[Index]; // Keep searching for selector(0) to begin a match. if (!isMatchingSelectorName(Tok, Tokens[Index + 1], SM, Selector->getNameForSlot(0))) continue; // We found a candidate for our match, this might be a method call, declaration, or unrelated identifier eg: // - [obj ^sel0: X sel1: Y ... ] // or // @interface Foo // - int ^sel0: X sel1: Y ... // @end // Check if we can find all the relevant selector peices starting from this token auto SelectorPieces = findAllSelectorPieces(Tokens.slice(Index), Selector); if (SelectorPieces) Ranges.emplace_back(std::move(*SelectorPieces)); } ``` ``` // Searches for all the fragments of the selector starting from Tokens[0]. Returns relevant ranges for those // if all could be found. llvm::Optional<SymbolRange> findAllSelectorPieces(llvm::ArrayRef<Token> Tokens, Selector, SM) { SymbolRange Result; auto CurMatch = 0; // Consumes all the tokens from beggining of Tokens until current selector fragment is found. // It only matches at top-level, i.e. ignores any nested calls/exprs. auto SkipUntil = [&] { std::stack<char> Parens; for(Index = 0; Index < Tokens.size() - 1; ++Index) { auto &Tok = Tokens[Index]; auto &Next = Tokens[Index+1]; switch (Tok.kind()) { case l_paren: case l_square: case l_brace: Parens.push(Tok.text(SM)[0]); break; case r_paren: case r_square: case r_brace: if (Parens.top() != GetOpenning(Tok.text(SM)[0])) { // Invalid code, just error-out. Tokens = {}; CurMatch = 0; return; } Parens.pop(); break; case identifier: // Ignore any nested tokens. if(!Parens.empty()) break; // Found current segment! if(isMatchingSelectorName(Tok, Next, SM, Selector(CurMatch))) return; default: break; } } }; while(CurMatch != Selector->getNumArgs() && !Tokens.empty()) { SkipUntil(); // We need at least a selector name and `:`. if(Tokens.size() < 2) return nullopt; Result.Ranges.push_back(tokenRangeForLoc(SM, Tokens.front(), ...); // Drop current selector fragment, keep searching for the rest; Tokens = Tokens.drop_front(2); ++CurMatch; } if (CurMatch == Selector->getNumArgs()) return Result; return std::nullopt; } ``` https://github.com/llvm/llvm-project/pull/76466 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits