eugene updated this revision to Diff 93572. eugene added a comment. Addressing code-review comments. Most notable change: MethodName::Parse() tries simple version of name parser, before invoking full power of CPlusPlusNameParser. It really helps with the perf.
https://reviews.llvm.org/D31451 Files: source/Plugins/Language/CPlusPlus/CMakeLists.txt source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h source/Plugins/Language/CPlusPlus/CPlusPlusNameParser.cpp source/Plugins/Language/CPlusPlus/CPlusPlusNameParser.h unittests/Language/CPlusPlus/CPlusPlusLanguageTest.cpp
Index: unittests/Language/CPlusPlus/CPlusPlusLanguageTest.cpp =================================================================== --- unittests/Language/CPlusPlus/CPlusPlusLanguageTest.cpp +++ unittests/Language/CPlusPlus/CPlusPlusLanguageTest.cpp @@ -6,35 +6,139 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// - #include "gtest/gtest.h" #include "Plugins/Language/CPlusPlus/CPlusPlusLanguage.h" using namespace lldb_private; -TEST(CPlusPlusLanguage, MethodName) { +TEST(CPlusPlusLanguage, MethodNameParsing) { struct TestCase { std::string input; std::string context, basename, arguments, qualifiers, scope_qualified_name; }; TestCase test_cases[] = { - {"foo::bar(baz)", "foo", "bar", "(baz)", "", "foo::bar"}, + {"main(int, char *[]) ", "", "main", "(int, char *[])", "", "main"}, + {"foo::bar(baz) const", "foo", "bar", "(baz)", "const", "foo::bar"}, + {"foo::~bar(baz)", "foo", "~bar", "(baz)", "", "foo::~bar"}, + {"a::b::c::d(e,f)", "a::b::c", "d", "(e,f)", "", "a::b::c::d"}, + {"void f(int)", "", "f", "(int)", "", "f"}, + + // Operators {"std::basic_ostream<char, std::char_traits<char> >& " "std::operator<<<std::char_traits<char> >" "(std::basic_ostream<char, std::char_traits<char> >&, char const*)", "std", "operator<<<std::char_traits<char> >", "(std::basic_ostream<char, std::char_traits<char> >&, char const*)", "", - "std::operator<<<std::char_traits<char> >"}}; + "std::operator<<<std::char_traits<char> >"}, + {"operator delete[](void*, clang::ASTContext const&, unsigned long)", "", + "operator delete[]", "(void*, clang::ASTContext const&, unsigned long)", + "", "operator delete[]"}, + {"llvm::Optional<clang::PostInitializer>::operator bool() const", + "llvm::Optional<clang::PostInitializer>", "operator bool", "()", "const", + "llvm::Optional<clang::PostInitializer>::operator bool"}, + {"(anonymous namespace)::FactManager::operator[](unsigned short)", + "(anonymous namespace)::FactManager", "operator[]", "(unsigned short)", + "", "(anonymous namespace)::FactManager::operator[]"}, + {"const int& std::map<int, pair<short, int>>::operator[](short) const", + "std::map<int, pair<short, int>>", "operator[]", "(short)", "const", + "std::map<int, pair<short, int>>::operator[]"}, + {"CompareInsn::operator()(llvm::StringRef, InsnMatchEntry const&)", + "CompareInsn", "operator()", "(llvm::StringRef, InsnMatchEntry const&)", + "", "CompareInsn::operator()"}, + {"llvm::Optional<llvm::MCFixupKind>::operator*() const &", + "llvm::Optional<llvm::MCFixupKind>", "operator*", "()", "const &", + "llvm::Optional<llvm::MCFixupKind>::operator*"}, + // Internal classes + {"operator<<(Cls, Cls)::Subclass::function()", + "operator<<(Cls, Cls)::Subclass", "function", "()", "", + "operator<<(Cls, Cls)::Subclass::function"}, + {"SAEC::checkFunction(context&) const::CallBack::CallBack(int)", + "SAEC::checkFunction(context&) const::CallBack", "CallBack", "(int)", "", + "SAEC::checkFunction(context&) const::CallBack::CallBack"}, + // Anonymous namespace + {"XX::(anonymous namespace)::anon_class::anon_func() const", + "XX::(anonymous namespace)::anon_class", "anon_func", "()", "const", + "XX::(anonymous namespace)::anon_class::anon_func"}, + + // Function pointers + {"string (*f(vector<int>&&))(float)", "", "f", "(vector<int>&&)", "", + "f"}, + {"void (*&std::_Any_data::_M_access<void (*)()>())()", "std::_Any_data", + "_M_access<void (*)()>", "()", "", + "std::_Any_data::_M_access<void (*)()>"}, + {"void (*(*(*(*(*(*(*(* const&func1(int))())())())())())())())()", "", + "func1", "(int)", "", "func1"}, + + // Templates + {"void llvm::PM<llvm::Module, llvm::AM<llvm::Module>>::" + "addPass<llvm::VP>(llvm::VP)", + "llvm::PM<llvm::Module, llvm::AM<llvm::Module>>", "addPass<llvm::VP>", + "(llvm::VP)", "", + "llvm::PM<llvm::Module, llvm::AM<llvm::Module>>::" + "addPass<llvm::VP>"}, + {"void std::vector<Class, std::allocator<Class> >" + "::_M_emplace_back_aux<Class const&>(Class const&)", + "std::vector<Class, std::allocator<Class> >", + "_M_emplace_back_aux<Class const&>", "(Class const&)", "", + "std::vector<Class, std::allocator<Class> >::" + "_M_emplace_back_aux<Class const&>"}, + {"unsigned long llvm::countTrailingOnes<unsigned int>" + "(unsigned int, llvm::ZeroBehavior)", + "llvm", "countTrailingOnes<unsigned int>", + "(unsigned int, llvm::ZeroBehavior)", "", + "llvm::countTrailingOnes<unsigned int>"}, + {"std::enable_if<(10u)<(64), bool>::type llvm::isUInt<10u>(unsigned " + "long)", + "llvm", "isUInt<10u>", "(unsigned long)", "", "llvm::isUInt<10u>"}, + {"f<A<operator<(X,Y)::Subclass>, sizeof(B)<sizeof(C)>()", "", + "f<A<operator<(X,Y)::Subclass>, sizeof(B)<sizeof(C)>", "()", "", + "f<A<operator<(X,Y)::Subclass>, sizeof(B)<sizeof(C)>"}}; for (const auto &test : test_cases) { CPlusPlusLanguage::MethodName method(ConstString(test.input)); - EXPECT_TRUE(method.IsValid()); - EXPECT_EQ(test.context, method.GetContext()); - EXPECT_EQ(test.basename, method.GetBasename()); - EXPECT_EQ(test.arguments, method.GetArguments()); - EXPECT_EQ(test.qualifiers, method.GetQualifiers()); - EXPECT_EQ(test.scope_qualified_name, method.GetScopeQualifiedName()); + EXPECT_TRUE(method.IsValid()) << test.input; + if (method.IsValid()) { + EXPECT_EQ(test.context, method.GetContext().str()); + EXPECT_EQ(test.basename, method.GetBasename().str()); + EXPECT_EQ(test.arguments, method.GetArguments().str()); + EXPECT_EQ(test.qualifiers, method.GetQualifiers().str()); + EXPECT_EQ(test.scope_qualified_name, method.GetScopeQualifiedName()); + } } } + +TEST(CPlusPlusLanguage, ExtractContextAndIdentifier) { + struct TestCase { + std::string input; + std::string context, basename; + }; + + TestCase test_cases[] = { + {"main", "", "main"}, + {"foo01::bar", "foo01", "bar"}, + {"foo::~bar", "foo", "~bar"}, + {"std::vector<int>::push_back", "std::vector<int>", "push_back"}, + {"operator<<(Cls, Cls)::Subclass::function", + "operator<<(Cls, Cls)::Subclass", "function"}, + {"std::vector<Class, std::allocator<Class>>" + "::_M_emplace_back_aux<Class const&>", + "std::vector<Class, std::allocator<Class>>", + "_M_emplace_back_aux<Class const&>"}}; + + llvm::StringRef context, basename; + for (const auto &test : test_cases) { + EXPECT_TRUE(CPlusPlusLanguage::ExtractContextAndIdentifier( + test.input.c_str(), context, basename)); + EXPECT_EQ(test.context, context.str()); + EXPECT_EQ(test.basename, basename.str()); + } + + EXPECT_FALSE(CPlusPlusLanguage::ExtractContextAndIdentifier("void", context, + basename)); + EXPECT_FALSE( + CPlusPlusLanguage::ExtractContextAndIdentifier("321", context, basename)); + EXPECT_FALSE( + CPlusPlusLanguage::ExtractContextAndIdentifier("", context, basename)); +} Index: source/Plugins/Language/CPlusPlus/CPlusPlusNameParser.h =================================================================== --- /dev/null +++ source/Plugins/Language/CPlusPlus/CPlusPlusNameParser.h @@ -0,0 +1,173 @@ +//===-- CPlusPlusNameParser.h -----------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef liblldb_CPlusPlusNameParser_h_ +#define liblldb_CPlusPlusNameParser_h_ + +// C Includes +// C++ Includes + +// Other libraries and framework includes +#include "clang/Lex/Lexer.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" + +// Project includes +#include "lldb/Utility/ConstString.h" +#include "lldb/lldb-private.h" + +namespace lldb_private { + +// Helps to validate and obtain various parts of C++ definitions. +class CPlusPlusNameParser { +public: + CPlusPlusNameParser(llvm::StringRef text) : m_text(text) { ExtractTokens(); } + + struct ParsedName { + llvm::StringRef basename; + llvm::StringRef context; + }; + + struct ParsedFunction { + ParsedName name; + llvm::StringRef arguments; + llvm::StringRef qualifiers; + }; + + // Treats given text as a function definition and parses it. + // Function definition might or might not have a return type and this should + // change parsing result. + // Examples: + // main(int, chat const*) + // T fun(int, bool) + // std::vector<int>::push_back(int) + // int& map<int, pair<short, int>>::operator[](short) const + // int (*get_function(const chat *))() + llvm::Optional<ParsedFunction> ParseAsFunctionDefinition(); + + // Treats given text as a potentially nested name of C++ entity (function, + // class, field) and parses it. + // Examples: + // main + // fun + // std::vector<int>::push_back + // map<int, pair<short, int>>::operator[] + // func<C>(int, C&)::nested_class::method + llvm::Optional<ParsedName> ParseAsFullName(); + +private: + // A C++ definition to parse. + llvm::StringRef m_text; + // Tokens extracted from m_text. + llvm::SmallVector<clang::Token, 30> m_tokens; + // Index of the next token to look at from m_tokens. + int m_next_token_index = 0; + + // Range of tokens saved in m_next_token_index. + struct Range { + int m_begin_index = 0; + int m_end_index = 0; + + Range() {} + + Range(int begin, int end) : m_begin_index(begin), m_end_index(end) {} + + int size() const { return m_end_index - m_begin_index; } + + bool empty() const { return size() == 0; } + }; + + struct ParsedNameRanges { + Range basename_range; + Range context_range; + }; + + // Bookmark automatically restores parsing position (m_next_token_index) + // when destructed unless it's manually removed with Remove(). + class Bookmark { + public: + Bookmark(int &position) + : m_position(position), m_position_value(position) {} + Bookmark(Bookmark &) = delete; + Bookmark(Bookmark &&b) + : m_position(b.m_position), m_position_value(b.m_position_value), + m_restore(b.m_restore) { + b.Remove(); + } + + void Remove() { m_restore = false; } + int GetSavedPosition() { return m_position_value; } + ~Bookmark() { + if (m_restore) { + m_position = m_position_value; + } + } + + private: + int &m_position; + int m_position_value; + bool m_restore = true; + }; + + bool HasMoreTokens(); + void Advance(); + void TakeBack(); + bool ConsumeToken(clang::tok::TokenKind kind); + template <typename... Ts> bool ConsumeToken(Ts... kinds); + Bookmark SetBookmark(); + int GetCurrentPosition(); + clang::Token &Peek(); + bool ConsumeBrackets(clang::tok::TokenKind left, clang::tok::TokenKind right); + + llvm::Optional<ParsedFunction> ParseFunctionImpl(bool expect_return_type); + + // Parses functions returning function pointers 'string (*f(int x))(float y)' + llvm::Optional<ParsedFunction> ParseFuncPtr(bool expect_return_type); + + // Consumes function arguments enclosed within '(' ... ')' + bool ConsumeArguments(); + + // Consumes template arguments enclosed within '<' ... '>' + bool ConsumeTemplateArgs(); + + // Consumes '(anonymous namespace)' + bool ConsumeAnonymousNamespace(); + + // Consumes operator declaration like 'operator *' or 'operator delete []' + bool ConsumeOperator(); + + // Skips 'const' and 'volatile' + void SkipTypeQualifiers(); + + // Skips 'const', 'volatile', '&', '&&' in the end of the function. + void SkipFunctionQualifiers(); + + // Consumes built-in types like 'int' or 'unsigned long long int' + bool ConsumeBuiltinType(); + + // Skips 'const' and 'volatile' + void SkipPtrsAndRefs(); + + // Consumes things like 'const * const &' + bool ConsumePtrsAndRefs(); + + // Consumes full type name like 'Namespace::Class<int>::Method()::InnerClass' + bool ConsumeTypename(); + + llvm::Optional<ParsedNameRanges> ParseFullNameImpl(); + llvm::StringRef GetTextForRange(const Range &range); + + // Populate m_tokens by calling clang lexer on m_text. + void ExtractTokens(); +}; + +} // namespace lldb_private + +#endif // liblldb_CPlusPlusNameParser_h_ Index: source/Plugins/Language/CPlusPlus/CPlusPlusNameParser.cpp =================================================================== --- /dev/null +++ source/Plugins/Language/CPlusPlus/CPlusPlusNameParser.cpp @@ -0,0 +1,615 @@ +//===-- CPlusPlusNameParser.cpp ---------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "CPlusPlusNameParser.h" + +#include "clang/Basic/IdentifierTable.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/Support/Threading.h" + +using namespace lldb; +using namespace lldb_private; +using llvm::Optional; +using llvm::None; +using ParsedFunction = lldb_private::CPlusPlusNameParser::ParsedFunction; +using ParsedName = lldb_private::CPlusPlusNameParser::ParsedName; +namespace tok = clang::tok; + +llvm::Optional<ParsedFunction> +CPlusPlusNameParser::ParseAsFunctionDefinition() { + m_next_token_index = 0; + llvm::Optional<ParsedFunction> result(None); + + // Try to parse the name as function without a return type specified + // e.g. main(int, char*[]) + { + Bookmark start_position = SetBookmark(); + result = ParseFunctionImpl(false); + if (result && !HasMoreTokens()) + return result; + } + + // Try to parse the name as function with function pointer return type + // e.g. void (*get_func(const char*))() + result = ParseFuncPtr(true); + if (result) + return result; + + // Finally try to parse the name as a function with non-function return type + // e.g. int main(int, char*[]) + result = ParseFunctionImpl(true); + return result; +} + +llvm::Optional<ParsedName> CPlusPlusNameParser::ParseAsFullName() { + m_next_token_index = 0; + llvm::Optional<ParsedNameRanges> name_ranges = ParseFullNameImpl(); + if (!name_ranges) + return None; + ParsedName result; + result.basename = GetTextForRange(name_ranges.getValue().basename_range); + result.context = GetTextForRange(name_ranges.getValue().context_range); + return result; +} + +bool CPlusPlusNameParser::HasMoreTokens() { + return m_next_token_index < static_cast<int>(m_tokens.size()); +} + +void CPlusPlusNameParser::Advance() { ++m_next_token_index; } + +void CPlusPlusNameParser::TakeBack() { --m_next_token_index; } + +bool CPlusPlusNameParser::ConsumeToken(tok::TokenKind kind) { + if (!HasMoreTokens()) + return false; + + if (!Peek().is(kind)) + return false; + + Advance(); + return true; +} + +template <typename... Ts> bool CPlusPlusNameParser::ConsumeToken(Ts... kinds) { + if (!HasMoreTokens()) + return false; + + if (!Peek().isOneOf(kinds...)) + return false; + + Advance(); + return true; +} + +CPlusPlusNameParser::Bookmark CPlusPlusNameParser::SetBookmark() { + return Bookmark(m_next_token_index); +} + +int CPlusPlusNameParser::GetCurrentPosition() { return m_next_token_index; } + +clang::Token &CPlusPlusNameParser::Peek() { + assert(HasMoreTokens()); + return m_tokens[m_next_token_index]; +} + +llvm::Optional<ParsedFunction> +CPlusPlusNameParser::ParseFunctionImpl(bool expect_return_type) { + Bookmark start_position = SetBookmark(); + if (expect_return_type) { + // Consume return type if it's expected. + if (!ConsumeTypename()) + return None; + } + + auto maybe_name = ParseFullNameImpl(); + if (!maybe_name) { + return None; + } + + int argument_start = GetCurrentPosition(); + if (!ConsumeArguments()) { + return None; + } + + int qualifiers_start = GetCurrentPosition(); + SkipFunctionQualifiers(); + int end_position = GetCurrentPosition(); + + ParsedFunction result; + result.name.basename = GetTextForRange(maybe_name.getValue().basename_range); + result.name.context = GetTextForRange(maybe_name.getValue().context_range); + result.arguments = GetTextForRange(Range(argument_start, qualifiers_start)); + result.qualifiers = GetTextForRange(Range(qualifiers_start, end_position)); + start_position.Remove(); + return result; +} + +llvm::Optional<ParsedFunction> +CPlusPlusNameParser::ParseFuncPtr(bool expect_return_type) { + Bookmark start_position = SetBookmark(); + if (expect_return_type) { + // Consume return type. + if (!ConsumeTypename()) + return None; + } + + if (!ConsumeToken(tok::l_paren)) + return None; + if (!ConsumePtrsAndRefs()) + return None; + + { + Bookmark before_inner_function_pos = SetBookmark(); + auto maybe_inner_function_name = ParseFunctionImpl(false); + if (maybe_inner_function_name) + if (ConsumeToken(tok::r_paren)) + if (ConsumeArguments()) { + SkipFunctionQualifiers(); + start_position.Remove(); + before_inner_function_pos.Remove(); + return maybe_inner_function_name; + } + } + + auto maybe_inner_function_ptr_name = ParseFuncPtr(false); + if (maybe_inner_function_ptr_name) + if (ConsumeToken(tok::r_paren)) + if (ConsumeArguments()) { + SkipFunctionQualifiers(); + start_position.Remove(); + return maybe_inner_function_ptr_name; + } + return None; +} + +bool CPlusPlusNameParser::ConsumeArguments() { + return ConsumeBrackets(tok::l_paren, tok::r_paren); +} + +bool CPlusPlusNameParser::ConsumeTemplateArgs() { + Bookmark start_position = SetBookmark(); + if (!HasMoreTokens() || Peek().getKind() != tok::less) + return false; + Advance(); + + // Consuming template arguments is a bit trickier than consuming function + // arguments, because '<' '>' brackets are not always trivially balanced. + // In some rare cases tokens '<' and '>' can appear inside template arguments + // as arithmetic or shift operators not as template brackets. + // Examples: std::enable_if<(10u)<(64), bool> + // f<A<operator<(X,Y)::Subclass>> + // Good thing that compiler makes sure that really ambiguous cases of + // '>' usage should be enclosed within '()' brackets. + int template_counter = 1; + bool can_open_template = false; + while (HasMoreTokens() && template_counter > 0) { + tok::TokenKind kind = Peek().getKind(); + switch (kind) { + case tok::greatergreater: + template_counter -= 2; + can_open_template = false; + Advance(); + break; + case tok::greater: + --template_counter; + can_open_template = false; + Advance(); + break; + case tok::less: + // '<' is an attempt to open a subteamplte + // check if parser is at the point where it's actually possible, + // otherwise it's just a part of an expression like 'sizeof(T)<(10)'. + // No need to do the same for '>' because compiler actually makes sure + // that '>' always surrounded by brackets to avoid ambiguity. + if (can_open_template) + ++template_counter; + can_open_template = false; + Advance(); + break; + case tok::kw_operator: // C++ operator overloading. + if (!ConsumeOperator()) + return false; + can_open_template = true; + break; + case tok::raw_identifier: + can_open_template = true; + Advance(); + break; + case tok::l_square: + if (!ConsumeBrackets(tok::l_square, tok::r_square)) + return false; + can_open_template = false; + break; + case tok::l_paren: + if (!ConsumeArguments()) + return false; + can_open_template = false; + break; + default: + can_open_template = false; + Advance(); + break; + } + } + + assert(template_counter >= 0); + if (template_counter > 0) { + return false; + } + start_position.Remove(); + return true; +} + +bool CPlusPlusNameParser::ConsumeAnonymousNamespace() { + Bookmark start_position = SetBookmark(); + if (!ConsumeToken(tok::l_paren)) { + return false; + } + static ConstString g_anonymous("anonymous"); + if (HasMoreTokens() && Peek().is(tok::raw_identifier) && + Peek().getRawIdentifier() == g_anonymous.GetStringRef()) { + Advance(); + } else { + return false; + } + + if (!ConsumeToken(tok::kw_namespace)) { + return false; + } + + if (!ConsumeToken(tok::r_paren)) { + return false; + } + start_position.Remove(); + return true; +} + +bool CPlusPlusNameParser::ConsumeBrackets(tok::TokenKind left, + tok::TokenKind right) { + Bookmark start_position = SetBookmark(); + if (!HasMoreTokens() || Peek().getKind() != left) + return false; + Advance(); + + int counter = 1; + while (HasMoreTokens() && counter > 0) { + tok::TokenKind kind = Peek().getKind(); + if (kind == right) + --counter; + else if (kind == left) + ++counter; + Advance(); + } + + assert(counter >= 0); + if (counter > 0) { + return false; + } + start_position.Remove(); + return true; +} + +bool CPlusPlusNameParser::ConsumeOperator() { + Bookmark start_position = SetBookmark(); + if (!ConsumeToken(tok::kw_operator)) + return false; + + if (!HasMoreTokens()) { + return false; + } + + const auto &token = Peek(); + switch (token.getKind()) { + case tok::kw_new: + case tok::kw_delete: + // This is 'new' or 'delete' operators. + Advance(); + // Check for array new/delete. + if (HasMoreTokens() && Peek().is(tok::l_square)) { + // Consume the '[' and ']'. + if (!ConsumeBrackets(tok::l_square, tok::r_square)) + return false; + } + break; + +#define OVERLOADED_OPERATOR(Name, Spelling, Token, Unary, Binary, MemberOnly) \ + case tok::Token: \ + Advance(); \ + break; +#define OVERLOADED_OPERATOR_MULTI(Name, Spelling, Unary, Binary, MemberOnly) +#include "clang/Basic/OperatorKinds.def" +#undef OVERLOADED_OPERATOR +#undef OVERLOADED_OPERATOR_MULTI + + case tok::l_paren: + // Call operator consume '(' ... ')'. + if (ConsumeBrackets(tok::l_paren, tok::r_paren)) + break; + return false; + + case tok::l_square: + // This is a [] operator. + // Consume the '[' and ']'. + if (ConsumeBrackets(tok::l_square, tok::r_square)) + break; + return false; + + default: + // This might be a cast operator. + if (ConsumeTypename()) + break; + return false; + } + start_position.Remove(); + return true; +} + +void CPlusPlusNameParser::SkipTypeQualifiers() { + while (ConsumeToken(tok::kw_const, tok::kw_volatile)) + ; +} + +void CPlusPlusNameParser::SkipFunctionQualifiers() { + while (ConsumeToken(tok::kw_const, tok::kw_volatile, tok::amp, tok::ampamp)) + ; +} + +bool CPlusPlusNameParser::ConsumeBuiltinType() { + bool result = false; + bool continue_parsing = true; + // Built-in types can be made of a few keywords + // like 'unsigned long long int'. This function + // consumes all built-in type keywords without + // checking if they make sense like 'unsigned char void'. + while (continue_parsing && HasMoreTokens()) { + switch (Peek().getKind()) { + case tok::kw_short: + case tok::kw_long: + case tok::kw___int64: + case tok::kw___int128: + case tok::kw_signed: + case tok::kw_unsigned: + case tok::kw_void: + case tok::kw_char: + case tok::kw_int: + case tok::kw_half: + case tok::kw_float: + case tok::kw_double: + case tok::kw___float128: + case tok::kw_wchar_t: + case tok::kw_bool: + case tok::kw_char16_t: + case tok::kw_char32_t: + result = true; + Advance(); + break; + default: + continue_parsing = false; + break; + } + } + return result; +} + +void CPlusPlusNameParser::SkipPtrsAndRefs() { + // Ignoring result. + ConsumePtrsAndRefs(); +} + +bool CPlusPlusNameParser::ConsumePtrsAndRefs() { + bool found = false; + SkipTypeQualifiers(); + while (ConsumeToken(tok::star, tok::amp, tok::ampamp, tok::kw_const, + tok::kw_volatile)) { + found = true; + SkipTypeQualifiers(); + } + return found; +} + +bool CPlusPlusNameParser::ConsumeTypename() { + Bookmark start_position = SetBookmark(); + SkipTypeQualifiers(); + if (!ConsumeBuiltinType()) { + if (!ParseFullNameImpl()) + return false; + } + SkipPtrsAndRefs(); + start_position.Remove(); + return true; +} + +llvm::Optional<CPlusPlusNameParser::ParsedNameRanges> +CPlusPlusNameParser::ParseFullNameImpl() { + // Name parsing state machine. + enum class State { + Beginning, // start of the name + AfterTwoColons, // right after :: + AfterIdentifier, // right after alphanumerical identifier ([a-z0-9_]+) + AfterTemplate, // right after template brackets (<something>) + AfterOperator, // right after name of C++ operator + }; + + Bookmark start_position = SetBookmark(); + State state = State::Beginning; + bool continue_parsing = true; + int last_coloncolon_position = -1; + + while (continue_parsing && HasMoreTokens()) { + const auto &token = Peek(); + switch (token.getKind()) { + case tok::raw_identifier: // Just a name. + if (state != State::Beginning && state != State::AfterTwoColons) { + continue_parsing = false; + break; + } + Advance(); + state = State::AfterIdentifier; + break; + case tok::l_paren: { + if (state == State::Beginning || state == State::AfterTwoColons) { + // (anonymous namespace) + if (ConsumeAnonymousNamespace()) { + state = State::AfterIdentifier; + break; + } + } + + // Type declared inside a function 'func()::Type' + if (state != State::AfterIdentifier && state != State::AfterTemplate && + state != State::AfterOperator) { + continue_parsing = false; + break; + } + Bookmark l_paren_position = SetBookmark(); + // Consume the '(' ... ') [const]'. + if (!ConsumeArguments()) { + continue_parsing = false; + break; + } + SkipFunctionQualifiers(); + + // Consume '::' + int coloncolon_position = GetCurrentPosition(); + if (!ConsumeToken(tok::coloncolon)) { + continue_parsing = false; + break; + } + l_paren_position.Remove(); + last_coloncolon_position = coloncolon_position; + state = State::AfterTwoColons; + break; + } + case tok::coloncolon: // Type nesting delimiter. + if (state != State::Beginning && state != State::AfterIdentifier && + state != State::AfterTemplate) { + continue_parsing = false; + break; + } + last_coloncolon_position = GetCurrentPosition(); + Advance(); + state = State::AfterTwoColons; + break; + case tok::less: // Template brackets. + if (state != State::AfterIdentifier && state != State::AfterOperator) { + continue_parsing = false; + break; + } + if (!ConsumeTemplateArgs()) { + continue_parsing = false; + break; + } + state = State::AfterTemplate; + break; + case tok::kw_operator: // C++ operator overloading. + if (state != State::Beginning && state != State::AfterTwoColons) { + continue_parsing = false; + break; + } + if (!ConsumeOperator()) { + continue_parsing = false; + break; + } + state = State::AfterOperator; + break; + case tok::tilde: // Destructor. + if (state != State::Beginning && state != State::AfterTwoColons) { + continue_parsing = false; + break; + } + Advance(); + if (ConsumeToken(tok::raw_identifier)) { + state = State::AfterIdentifier; + } else { + TakeBack(); + continue_parsing = false; + } + break; + default: + continue_parsing = false; + break; + } + } + + if (state == State::AfterIdentifier || state == State::AfterOperator || + state == State::AfterTemplate) { + ParsedNameRanges result; + if (last_coloncolon_position != -1) { + result.context_range = + Range(start_position.GetSavedPosition(), last_coloncolon_position); + result.basename_range = + Range(last_coloncolon_position + 1, GetCurrentPosition()); + } else { + result.basename_range = + Range(start_position.GetSavedPosition(), GetCurrentPosition()); + } + start_position.Remove(); + return result; + } else { + return None; + } +} + +llvm::StringRef CPlusPlusNameParser::GetTextForRange(const Range &range) { + if (range.empty()) + return llvm::StringRef(); + assert(range.m_begin_index < range.m_end_index); + assert(range.m_begin_index < (int)m_tokens.size()); + assert(range.m_end_index <= (int)m_tokens.size()); + clang::Token &first_token = m_tokens[range.m_begin_index]; + clang::Token &last_token = m_tokens[range.m_end_index - 1]; + clang::SourceLocation start_loc = first_token.getLocation(); + clang::SourceLocation end_loc = last_token.getLocation(); + unsigned start_pos = start_loc.getRawEncoding(); + unsigned end_pos = end_loc.getRawEncoding() + last_token.getLength(); + return m_text.take_front(end_pos).drop_front(start_pos); +} + +static const clang::LangOptions &GetLangOptions() { + static clang::LangOptions g_options; + static llvm::once_flag g_once_flag; + llvm::call_once(g_once_flag, []() { + g_options.LineComment = true; + g_options.C99 = true; + g_options.C11 = true; + g_options.CPlusPlus = true; + g_options.CPlusPlus11 = true; + g_options.CPlusPlus14 = true; + g_options.CPlusPlus1z = true; + }); + return g_options; +} + +static const llvm::StringMap<tok::TokenKind> &GetKeywordsMap() { + static llvm::StringMap<tok::TokenKind> g_map{ +#define KEYWORD(Name, Flags) {llvm::StringRef(#Name), tok::kw_##Name}, +#include "clang/Basic/TokenKinds.def" +#undef KEYWORD + }; + return g_map; +} + +void CPlusPlusNameParser::ExtractTokens() { + clang::Lexer lexer(clang::SourceLocation(), GetLangOptions(), m_text.data(), + m_text.data(), m_text.data() + m_text.size()); + const auto &kw_map = GetKeywordsMap(); + clang::Token token; + for (lexer.LexFromRawLexer(token); !token.is(clang::tok::eof); + lexer.LexFromRawLexer(token)) { + if (token.is(clang::tok::raw_identifier)) { + auto it = kw_map.find(token.getRawIdentifier()); + if (it != kw_map.end()) { + token.setKind(it->getValue()); + } + } + + m_tokens.push_back(token); + } +} Index: source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h =================================================================== --- source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h +++ source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h @@ -29,35 +29,24 @@ public: class MethodName { public: - enum Type { - eTypeInvalid, - eTypeUnknownMethod, - eTypeClassMethod, - eTypeInstanceMethod - }; - MethodName() : m_full(), m_basename(), m_context(), m_arguments(), m_qualifiers(), - m_type(eTypeInvalid), m_parsed(false), m_parse_error(false) {} + m_parsed(false), m_parse_error(false) {} MethodName(const ConstString &s) : m_full(s), m_basename(), m_context(), m_arguments(), m_qualifiers(), - m_type(eTypeInvalid), m_parsed(false), m_parse_error(false) {} + m_parsed(false), m_parse_error(false) {} void Clear(); bool IsValid() { if (!m_parsed) Parse(); if (m_parse_error) return false; - if (m_type == eTypeInvalid) - return false; return (bool)m_full; } - Type GetType() const { return m_type; } - const ConstString &GetFullName() const { return m_full; } std::string GetScopeQualifiedName(); @@ -72,15 +61,15 @@ protected: void Parse(); + bool TrySimplifiedParse(); ConstString m_full; // Full name: // "lldb::SBTarget::GetBreakpointAtIndex(unsigned int) // const" llvm::StringRef m_basename; // Basename: "GetBreakpointAtIndex" llvm::StringRef m_context; // Decl context: "lldb::SBTarget" llvm::StringRef m_arguments; // Arguments: "(unsigned int)" llvm::StringRef m_qualifiers; // Qualifiers: "const" - Type m_type; bool m_parsed; bool m_parse_error; }; @@ -121,7 +110,7 @@ // If the name is a lone C identifier (e.g. C) or a qualified C identifier // (e.g. A::B::C) it will return true, // and identifier will be the identifier (C and C respectively) and the - // context will be "" and "A::B::" respectively. + // context will be "" and "A::B" respectively. // If the name fails the heuristic matching for a qualified or unqualified // C/C++ identifier, then it will return false // and identifier and context will be unchanged. Index: source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp =================================================================== --- source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp +++ source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp @@ -21,7 +21,6 @@ // Other libraries and framework includes #include "llvm/ADT/StringRef.h" -#include "llvm/Support/Threading.h" // Project includes #include "lldb/Core/PluginManager.h" @@ -36,6 +35,7 @@ #include "lldb/Utility/RegularExpression.h" #include "BlockPointer.h" +#include "CPlusPlusNameParser.h" #include "CxxStringTypes.h" #include "LibCxx.h" #include "LibCxxAtomic.h" @@ -85,15 +85,14 @@ m_context = llvm::StringRef(); m_arguments = llvm::StringRef(); m_qualifiers = llvm::StringRef(); - m_type = eTypeInvalid; m_parsed = false; m_parse_error = false; } -bool ReverseFindMatchingChars(const llvm::StringRef &s, - const llvm::StringRef &left_right_chars, - size_t &left_pos, size_t &right_pos, - size_t pos = llvm::StringRef::npos) { +static bool ReverseFindMatchingChars(const llvm::StringRef &s, + const llvm::StringRef &left_right_chars, + size_t &left_pos, size_t &right_pos, + size_t pos = llvm::StringRef::npos) { assert(left_right_chars.size() == 2); left_pos = llvm::StringRef::npos; const char left_char = left_right_chars[0]; @@ -119,10 +118,9 @@ return false; } -static bool IsValidBasename(const llvm::StringRef &basename) { - // Check that the basename matches with the following regular expression or is - // an operator name: - // "^~?([A-Za-z_][A-Za-z_0-9]*)(<.*>)?$" +static bool IsTrivialBasename(const llvm::StringRef &basename) { + // Check that the basename matches with the following regular expression + // "^~?([A-Za-z_][A-Za-z_0-9]*)$" // We are using a hand written implementation because it is significantly more // efficient then // using the general purpose regular expression library. @@ -149,100 +147,69 @@ if (idx == basename.size()) return true; - // Check for basename with template arguments - // TODO: Improve the quality of the validation with validating the template - // arguments - if (basename[idx] == '<' && basename.back() == '>') - return true; + return false; +} - // Check if the basename is a vaild C++ operator name - if (!basename.startswith("operator")) - return false; +bool CPlusPlusLanguage::MethodName::TrySimplifiedParse() { + // This method tries to parse simple method definitions + // which are presumably most comman in user programs. + // Definitions that can be parsed by this function don't have return types + // and templates in the name. + // A::B::C::fun(std::vector<T> &) const + size_t arg_start, arg_end; + llvm::StringRef full(m_full.GetCString()); + llvm::StringRef parens("()", 2); + if (ReverseFindMatchingChars(full, parens, arg_start, arg_end)) { + m_arguments = full.substr(arg_start, arg_end - arg_start + 1); + if (arg_end + 1 < full.size()) + m_qualifiers = full.substr(arg_end + 1).ltrim(); + + if (arg_start == 0) + return false; + size_t basename_end = arg_start; + size_t context_start = 0; + size_t context_end = full.rfind(':', basename_end); + if (context_end == llvm::StringRef::npos) + m_basename = full.substr(0, basename_end); + else { + if (context_start < context_end) + m_context = full.substr(context_start, context_end - 1 - context_start); + const size_t basename_begin = context_end + 1; + m_basename = full.substr(basename_begin, basename_end - basename_begin); + } - static RegularExpression g_operator_regex( - llvm::StringRef("^(operator)( " - "?)([A-Za-z_][A-Za-z_0-9]*|\\(\\)|" - "\\[\\]|[\\^<>=!\\/" - "*+-]+)(<.*>)?(\\[\\])?$")); - std::string basename_str(basename.str()); - return g_operator_regex.Execute(basename_str, nullptr); + if (IsTrivialBasename(m_basename)) { + return true; + } else { + // The C++ basename doesn't match our regular expressions so this can't + // be a valid C++ method, clear everything out and indicate an error + m_context = llvm::StringRef(); + m_basename = llvm::StringRef(); + m_arguments = llvm::StringRef(); + m_qualifiers = llvm::StringRef(); + return false; + } + } + return false; } void CPlusPlusLanguage::MethodName::Parse() { if (!m_parsed && m_full) { - // ConstString mangled; - // m_full.GetMangledCounterpart(mangled); - // printf ("\n parsing = '%s'\n", m_full.GetCString()); - // if (mangled) - // printf (" mangled = '%s'\n", mangled.GetCString()); - m_parse_error = false; - m_parsed = true; - llvm::StringRef full(m_full.GetCString()); - - size_t arg_start, arg_end; - llvm::StringRef parens("()", 2); - if (ReverseFindMatchingChars(full, parens, arg_start, arg_end)) { - m_arguments = full.substr(arg_start, arg_end - arg_start + 1); - if (arg_end + 1 < full.size()) - m_qualifiers = full.substr(arg_end + 1); - if (arg_start > 0) { - size_t basename_end = arg_start; - size_t context_start = 0; - size_t context_end = llvm::StringRef::npos; - if (basename_end > 0 && full[basename_end - 1] == '>') { - // TODO: handle template junk... - // Templated function - size_t template_start, template_end; - llvm::StringRef lt_gt("<>", 2); - if (ReverseFindMatchingChars(full, lt_gt, template_start, - template_end, basename_end)) { - // Check for templated functions that include return type like: - // 'void foo<Int>()' - context_start = full.rfind(' ', template_start); - if (context_start == llvm::StringRef::npos) - context_start = 0; - else - ++context_start; - - context_end = full.rfind(':', template_start); - if (context_end == llvm::StringRef::npos || - context_end < context_start) - context_end = context_start; - } else { - context_end = full.rfind(':', basename_end); - } - } else if (context_end == llvm::StringRef::npos) { - context_end = full.rfind(':', basename_end); - } - - if (context_end == llvm::StringRef::npos) - m_basename = full.substr(0, basename_end); - else { - if (context_start < context_end) - m_context = - full.substr(context_start, context_end - 1 - context_start); - const size_t basename_begin = context_end + 1; - m_basename = - full.substr(basename_begin, basename_end - basename_begin); - } - m_type = eTypeUnknownMethod; + if (TrySimplifiedParse()) { + m_parse_error = false; + } else { + CPlusPlusNameParser parser(m_full.GetStringRef()); + if (auto function = parser.ParseAsFunctionDefinition()) { + m_basename = function.getValue().name.basename; + m_context = function.getValue().name.context; + m_arguments = function.getValue().arguments; + m_qualifiers = function.getValue().qualifiers; + m_parse_error = false; } else { m_parse_error = true; - return; - } - - if (!IsValidBasename(m_basename)) { - // The C++ basename doesn't match our regular expressions so this can't - // be a valid C++ method, clear everything out and indicate an error - m_context = llvm::StringRef(); - m_basename = llvm::StringRef(); - m_arguments = llvm::StringRef(); - m_qualifiers = llvm::StringRef(); - m_parse_error = true; } - } else { - m_parse_error = true; } + m_parsed = true; } } @@ -273,14 +240,13 @@ std::string CPlusPlusLanguage::MethodName::GetScopeQualifiedName() { if (!m_parsed) Parse(); - if (m_basename.empty() || m_context.empty()) - return std::string(); + if (m_context.empty()) + return m_basename; std::string res; res += m_context; res += "::"; res += m_basename; - return res; } @@ -296,13 +262,10 @@ bool CPlusPlusLanguage::ExtractContextAndIdentifier( const char *name, llvm::StringRef &context, llvm::StringRef &identifier) { - static RegularExpression g_basename_regex(llvm::StringRef( - "^(([A-Za-z_][A-Za-z_0-9]*::)*)(~?[A-Za-z_~][A-Za-z_0-9]*)$")); - RegularExpression::Match match(4); - if (g_basename_regex.Execute(llvm::StringRef::withNullAsEmpty(name), - &match)) { - match.GetMatchAtIndex(name, 1, context); - match.GetMatchAtIndex(name, 3, identifier); + CPlusPlusNameParser parser(name); + if (auto full_name = parser.ParseAsFullName()) { + identifier = full_name.getValue().basename; + context = full_name.getValue().context; return true; } return false; Index: source/Plugins/Language/CPlusPlus/CMakeLists.txt =================================================================== --- source/Plugins/Language/CPlusPlus/CMakeLists.txt +++ source/Plugins/Language/CPlusPlus/CMakeLists.txt @@ -1,6 +1,7 @@ add_lldb_library(lldbPluginCPlusPlusLanguage PLUGIN BlockPointer.cpp CPlusPlusLanguage.cpp + CPlusPlusNameParser.cpp CxxStringTypes.cpp LibCxx.cpp LibCxxAtomic.cpp
_______________________________________________ lldb-commits mailing list lldb-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits