eugene updated this revision to Diff 93572.
eugene added a comment.

Addressing code-review comments. 
Most notable change: MethodName::Parse() tries simple version of name parser, 
before invoking full power of CPlusPlusNameParser. It really helps with the 
perf.


https://reviews.llvm.org/D31451

Files:
  source/Plugins/Language/CPlusPlus/CMakeLists.txt
  source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp
  source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h
  source/Plugins/Language/CPlusPlus/CPlusPlusNameParser.cpp
  source/Plugins/Language/CPlusPlus/CPlusPlusNameParser.h
  unittests/Language/CPlusPlus/CPlusPlusLanguageTest.cpp

Index: unittests/Language/CPlusPlus/CPlusPlusLanguageTest.cpp
===================================================================
--- unittests/Language/CPlusPlus/CPlusPlusLanguageTest.cpp
+++ unittests/Language/CPlusPlus/CPlusPlusLanguageTest.cpp
@@ -6,35 +6,139 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-
 #include "gtest/gtest.h"
 
 #include "Plugins/Language/CPlusPlus/CPlusPlusLanguage.h"
 
 using namespace lldb_private;
 
-TEST(CPlusPlusLanguage, MethodName) {
+TEST(CPlusPlusLanguage, MethodNameParsing) {
   struct TestCase {
     std::string input;
     std::string context, basename, arguments, qualifiers, scope_qualified_name;
   };
 
   TestCase test_cases[] = {
-      {"foo::bar(baz)", "foo", "bar", "(baz)", "", "foo::bar"},
+      {"main(int, char *[]) ", "", "main", "(int, char *[])", "", "main"},
+      {"foo::bar(baz) const", "foo", "bar", "(baz)", "const", "foo::bar"},
+      {"foo::~bar(baz)", "foo", "~bar", "(baz)", "", "foo::~bar"},
+      {"a::b::c::d(e,f)", "a::b::c", "d", "(e,f)", "", "a::b::c::d"},
+      {"void f(int)", "", "f", "(int)", "", "f"},
+
+      // Operators
       {"std::basic_ostream<char, std::char_traits<char> >& "
        "std::operator<<<std::char_traits<char> >"
        "(std::basic_ostream<char, std::char_traits<char> >&, char const*)",
        "std", "operator<<<std::char_traits<char> >",
        "(std::basic_ostream<char, std::char_traits<char> >&, char const*)", "",
-       "std::operator<<<std::char_traits<char> >"}};
+       "std::operator<<<std::char_traits<char> >"},
+      {"operator delete[](void*, clang::ASTContext const&, unsigned long)", "",
+       "operator delete[]", "(void*, clang::ASTContext const&, unsigned long)",
+       "", "operator delete[]"},
+      {"llvm::Optional<clang::PostInitializer>::operator bool() const",
+       "llvm::Optional<clang::PostInitializer>", "operator bool", "()", "const",
+       "llvm::Optional<clang::PostInitializer>::operator bool"},
+      {"(anonymous namespace)::FactManager::operator[](unsigned short)",
+       "(anonymous namespace)::FactManager", "operator[]", "(unsigned short)",
+       "", "(anonymous namespace)::FactManager::operator[]"},
+      {"const int& std::map<int, pair<short, int>>::operator[](short) const",
+       "std::map<int, pair<short, int>>", "operator[]", "(short)", "const",
+       "std::map<int, pair<short, int>>::operator[]"},
+      {"CompareInsn::operator()(llvm::StringRef, InsnMatchEntry const&)",
+       "CompareInsn", "operator()", "(llvm::StringRef, InsnMatchEntry const&)",
+       "", "CompareInsn::operator()"},
+      {"llvm::Optional<llvm::MCFixupKind>::operator*() const &",
+       "llvm::Optional<llvm::MCFixupKind>", "operator*", "()", "const &",
+       "llvm::Optional<llvm::MCFixupKind>::operator*"},
+      // Internal classes
+      {"operator<<(Cls, Cls)::Subclass::function()",
+       "operator<<(Cls, Cls)::Subclass", "function", "()", "",
+       "operator<<(Cls, Cls)::Subclass::function"},
+      {"SAEC::checkFunction(context&) const::CallBack::CallBack(int)",
+       "SAEC::checkFunction(context&) const::CallBack", "CallBack", "(int)", "",
+       "SAEC::checkFunction(context&) const::CallBack::CallBack"},
+      // Anonymous namespace
+      {"XX::(anonymous namespace)::anon_class::anon_func() const",
+       "XX::(anonymous namespace)::anon_class", "anon_func", "()", "const",
+       "XX::(anonymous namespace)::anon_class::anon_func"},
+
+      // Function pointers
+      {"string (*f(vector<int>&&))(float)", "", "f", "(vector<int>&&)", "",
+       "f"},
+      {"void (*&std::_Any_data::_M_access<void (*)()>())()", "std::_Any_data",
+       "_M_access<void (*)()>", "()", "",
+       "std::_Any_data::_M_access<void (*)()>"},
+      {"void (*(*(*(*(*(*(*(* const&func1(int))())())())())())())())()", "",
+       "func1", "(int)", "", "func1"},
+
+      // Templates
+      {"void llvm::PM<llvm::Module, llvm::AM<llvm::Module>>::"
+       "addPass<llvm::VP>(llvm::VP)",
+       "llvm::PM<llvm::Module, llvm::AM<llvm::Module>>", "addPass<llvm::VP>",
+       "(llvm::VP)", "",
+       "llvm::PM<llvm::Module, llvm::AM<llvm::Module>>::"
+       "addPass<llvm::VP>"},
+      {"void std::vector<Class, std::allocator<Class> >"
+       "::_M_emplace_back_aux<Class const&>(Class const&)",
+       "std::vector<Class, std::allocator<Class> >",
+       "_M_emplace_back_aux<Class const&>", "(Class const&)", "",
+       "std::vector<Class, std::allocator<Class> >::"
+       "_M_emplace_back_aux<Class const&>"},
+      {"unsigned long llvm::countTrailingOnes<unsigned int>"
+       "(unsigned int, llvm::ZeroBehavior)",
+       "llvm", "countTrailingOnes<unsigned int>",
+       "(unsigned int, llvm::ZeroBehavior)", "",
+       "llvm::countTrailingOnes<unsigned int>"},
+      {"std::enable_if<(10u)<(64), bool>::type llvm::isUInt<10u>(unsigned "
+       "long)",
+       "llvm", "isUInt<10u>", "(unsigned long)", "", "llvm::isUInt<10u>"},
+      {"f<A<operator<(X,Y)::Subclass>, sizeof(B)<sizeof(C)>()", "",
+       "f<A<operator<(X,Y)::Subclass>, sizeof(B)<sizeof(C)>", "()", "",
+       "f<A<operator<(X,Y)::Subclass>, sizeof(B)<sizeof(C)>"}};
 
   for (const auto &test : test_cases) {
     CPlusPlusLanguage::MethodName method(ConstString(test.input));
-    EXPECT_TRUE(method.IsValid());
-    EXPECT_EQ(test.context, method.GetContext());
-    EXPECT_EQ(test.basename, method.GetBasename());
-    EXPECT_EQ(test.arguments, method.GetArguments());
-    EXPECT_EQ(test.qualifiers, method.GetQualifiers());
-    EXPECT_EQ(test.scope_qualified_name, method.GetScopeQualifiedName());
+    EXPECT_TRUE(method.IsValid()) << test.input;
+    if (method.IsValid()) {
+      EXPECT_EQ(test.context, method.GetContext().str());
+      EXPECT_EQ(test.basename, method.GetBasename().str());
+      EXPECT_EQ(test.arguments, method.GetArguments().str());
+      EXPECT_EQ(test.qualifiers, method.GetQualifiers().str());
+      EXPECT_EQ(test.scope_qualified_name, method.GetScopeQualifiedName());
+    }
   }
 }
+
+TEST(CPlusPlusLanguage, ExtractContextAndIdentifier) {
+  struct TestCase {
+    std::string input;
+    std::string context, basename;
+  };
+
+  TestCase test_cases[] = {
+      {"main", "", "main"},
+      {"foo01::bar", "foo01", "bar"},
+      {"foo::~bar", "foo", "~bar"},
+      {"std::vector<int>::push_back", "std::vector<int>", "push_back"},
+      {"operator<<(Cls, Cls)::Subclass::function",
+       "operator<<(Cls, Cls)::Subclass", "function"},
+      {"std::vector<Class, std::allocator<Class>>"
+       "::_M_emplace_back_aux<Class const&>",
+       "std::vector<Class, std::allocator<Class>>",
+       "_M_emplace_back_aux<Class const&>"}};
+
+  llvm::StringRef context, basename;
+  for (const auto &test : test_cases) {
+    EXPECT_TRUE(CPlusPlusLanguage::ExtractContextAndIdentifier(
+        test.input.c_str(), context, basename));
+    EXPECT_EQ(test.context, context.str());
+    EXPECT_EQ(test.basename, basename.str());
+  }
+
+  EXPECT_FALSE(CPlusPlusLanguage::ExtractContextAndIdentifier("void", context,
+                                                              basename));
+  EXPECT_FALSE(
+      CPlusPlusLanguage::ExtractContextAndIdentifier("321", context, basename));
+  EXPECT_FALSE(
+      CPlusPlusLanguage::ExtractContextAndIdentifier("", context, basename));
+}
Index: source/Plugins/Language/CPlusPlus/CPlusPlusNameParser.h
===================================================================
--- /dev/null
+++ source/Plugins/Language/CPlusPlus/CPlusPlusNameParser.h
@@ -0,0 +1,173 @@
+//===-- CPlusPlusNameParser.h -----------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef liblldb_CPlusPlusNameParser_h_
+#define liblldb_CPlusPlusNameParser_h_
+
+// C Includes
+// C++ Includes
+
+// Other libraries and framework includes
+#include "clang/Lex/Lexer.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+
+// Project includes
+#include "lldb/Utility/ConstString.h"
+#include "lldb/lldb-private.h"
+
+namespace lldb_private {
+
+// Helps to validate and obtain various parts of C++ definitions.
+class CPlusPlusNameParser {
+public:
+  CPlusPlusNameParser(llvm::StringRef text) : m_text(text) { ExtractTokens(); }
+
+  struct ParsedName {
+    llvm::StringRef basename;
+    llvm::StringRef context;
+  };
+
+  struct ParsedFunction {
+    ParsedName name;
+    llvm::StringRef arguments;
+    llvm::StringRef qualifiers;
+  };
+
+  // Treats given text as a function definition and parses it.
+  // Function definition might or might not have a return type and this should
+  // change parsing result.
+  // Examples:
+  //    main(int, chat const*)
+  //    T fun(int, bool)
+  //    std::vector<int>::push_back(int)
+  //    int& map<int, pair<short, int>>::operator[](short) const
+  //    int (*get_function(const chat *))()
+  llvm::Optional<ParsedFunction> ParseAsFunctionDefinition();
+
+  // Treats given text as a potentially nested name of C++ entity (function,
+  // class, field) and parses it.
+  // Examples:
+  //    main
+  //    fun
+  //    std::vector<int>::push_back
+  //    map<int, pair<short, int>>::operator[]
+  //    func<C>(int, C&)::nested_class::method
+  llvm::Optional<ParsedName> ParseAsFullName();
+
+private:
+  // A C++ definition to parse.
+  llvm::StringRef m_text;
+  // Tokens extracted from m_text.
+  llvm::SmallVector<clang::Token, 30> m_tokens;
+  // Index of the next token to look at from m_tokens.
+  int m_next_token_index = 0;
+
+  // Range of tokens saved in m_next_token_index.
+  struct Range {
+    int m_begin_index = 0;
+    int m_end_index = 0;
+
+    Range() {}
+
+    Range(int begin, int end) : m_begin_index(begin), m_end_index(end) {}
+
+    int size() const { return m_end_index - m_begin_index; }
+
+    bool empty() const { return size() == 0; }
+  };
+
+  struct ParsedNameRanges {
+    Range basename_range;
+    Range context_range;
+  };
+
+  // Bookmark automatically restores parsing position (m_next_token_index)
+  // when destructed unless it's manually removed with Remove().
+  class Bookmark {
+  public:
+    Bookmark(int &position)
+        : m_position(position), m_position_value(position) {}
+    Bookmark(Bookmark &) = delete;
+    Bookmark(Bookmark &&b)
+        : m_position(b.m_position), m_position_value(b.m_position_value),
+          m_restore(b.m_restore) {
+      b.Remove();
+    }
+
+    void Remove() { m_restore = false; }
+    int GetSavedPosition() { return m_position_value; }
+    ~Bookmark() {
+      if (m_restore) {
+        m_position = m_position_value;
+      }
+    }
+
+  private:
+    int &m_position;
+    int m_position_value;
+    bool m_restore = true;
+  };
+
+  bool HasMoreTokens();
+  void Advance();
+  void TakeBack();
+  bool ConsumeToken(clang::tok::TokenKind kind);
+  template <typename... Ts> bool ConsumeToken(Ts... kinds);
+  Bookmark SetBookmark();
+  int GetCurrentPosition();
+  clang::Token &Peek();
+  bool ConsumeBrackets(clang::tok::TokenKind left, clang::tok::TokenKind right);
+
+  llvm::Optional<ParsedFunction> ParseFunctionImpl(bool expect_return_type);
+
+  // Parses functions returning function pointers 'string (*f(int x))(float y)'
+  llvm::Optional<ParsedFunction> ParseFuncPtr(bool expect_return_type);
+
+  // Consumes function arguments enclosed within '(' ... ')'
+  bool ConsumeArguments();
+
+  // Consumes template arguments enclosed within '<' ... '>'
+  bool ConsumeTemplateArgs();
+
+  // Consumes '(anonymous namespace)'
+  bool ConsumeAnonymousNamespace();
+
+  // Consumes operator declaration like 'operator *' or 'operator delete []'
+  bool ConsumeOperator();
+
+  // Skips 'const' and 'volatile'
+  void SkipTypeQualifiers();
+
+  // Skips 'const', 'volatile', '&', '&&' in the end of the function.
+  void SkipFunctionQualifiers();
+
+  // Consumes built-in types like 'int' or 'unsigned long long int'
+  bool ConsumeBuiltinType();
+
+  // Skips 'const' and 'volatile'
+  void SkipPtrsAndRefs();
+
+  // Consumes things like 'const * const &'
+  bool ConsumePtrsAndRefs();
+
+  // Consumes full type name like 'Namespace::Class<int>::Method()::InnerClass'
+  bool ConsumeTypename();
+
+  llvm::Optional<ParsedNameRanges> ParseFullNameImpl();
+  llvm::StringRef GetTextForRange(const Range &range);
+
+  // Populate m_tokens by calling clang lexer on m_text.
+  void ExtractTokens();
+};
+
+} // namespace lldb_private
+
+#endif // liblldb_CPlusPlusNameParser_h_
Index: source/Plugins/Language/CPlusPlus/CPlusPlusNameParser.cpp
===================================================================
--- /dev/null
+++ source/Plugins/Language/CPlusPlus/CPlusPlusNameParser.cpp
@@ -0,0 +1,615 @@
+//===-- CPlusPlusNameParser.cpp ---------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CPlusPlusNameParser.h"
+
+#include "clang/Basic/IdentifierTable.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/Threading.h"
+
+using namespace lldb;
+using namespace lldb_private;
+using llvm::Optional;
+using llvm::None;
+using ParsedFunction = lldb_private::CPlusPlusNameParser::ParsedFunction;
+using ParsedName = lldb_private::CPlusPlusNameParser::ParsedName;
+namespace tok = clang::tok;
+
+llvm::Optional<ParsedFunction>
+CPlusPlusNameParser::ParseAsFunctionDefinition() {
+  m_next_token_index = 0;
+  llvm::Optional<ParsedFunction> result(None);
+
+  // Try to parse the name as function without a return type specified
+  // e.g. main(int, char*[])
+  {
+    Bookmark start_position = SetBookmark();
+    result = ParseFunctionImpl(false);
+    if (result && !HasMoreTokens())
+      return result;
+  }
+
+  // Try to parse the name as function with function pointer return type
+  // e.g. void (*get_func(const char*))()
+  result = ParseFuncPtr(true);
+  if (result)
+    return result;
+
+  // Finally try to parse the name as a function with non-function return type
+  // e.g. int main(int, char*[])
+  result = ParseFunctionImpl(true);
+  return result;
+}
+
+llvm::Optional<ParsedName> CPlusPlusNameParser::ParseAsFullName() {
+  m_next_token_index = 0;
+  llvm::Optional<ParsedNameRanges> name_ranges = ParseFullNameImpl();
+  if (!name_ranges)
+    return None;
+  ParsedName result;
+  result.basename = GetTextForRange(name_ranges.getValue().basename_range);
+  result.context = GetTextForRange(name_ranges.getValue().context_range);
+  return result;
+}
+
+bool CPlusPlusNameParser::HasMoreTokens() {
+  return m_next_token_index < static_cast<int>(m_tokens.size());
+}
+
+void CPlusPlusNameParser::Advance() { ++m_next_token_index; }
+
+void CPlusPlusNameParser::TakeBack() { --m_next_token_index; }
+
+bool CPlusPlusNameParser::ConsumeToken(tok::TokenKind kind) {
+  if (!HasMoreTokens())
+    return false;
+
+  if (!Peek().is(kind))
+    return false;
+
+  Advance();
+  return true;
+}
+
+template <typename... Ts> bool CPlusPlusNameParser::ConsumeToken(Ts... kinds) {
+  if (!HasMoreTokens())
+    return false;
+
+  if (!Peek().isOneOf(kinds...))
+    return false;
+
+  Advance();
+  return true;
+}
+
+CPlusPlusNameParser::Bookmark CPlusPlusNameParser::SetBookmark() {
+  return Bookmark(m_next_token_index);
+}
+
+int CPlusPlusNameParser::GetCurrentPosition() { return m_next_token_index; }
+
+clang::Token &CPlusPlusNameParser::Peek() {
+  assert(HasMoreTokens());
+  return m_tokens[m_next_token_index];
+}
+
+llvm::Optional<ParsedFunction>
+CPlusPlusNameParser::ParseFunctionImpl(bool expect_return_type) {
+  Bookmark start_position = SetBookmark();
+  if (expect_return_type) {
+    // Consume return type if it's expected.
+    if (!ConsumeTypename())
+      return None;
+  }
+
+  auto maybe_name = ParseFullNameImpl();
+  if (!maybe_name) {
+    return None;
+  }
+
+  int argument_start = GetCurrentPosition();
+  if (!ConsumeArguments()) {
+    return None;
+  }
+
+  int qualifiers_start = GetCurrentPosition();
+  SkipFunctionQualifiers();
+  int end_position = GetCurrentPosition();
+
+  ParsedFunction result;
+  result.name.basename = GetTextForRange(maybe_name.getValue().basename_range);
+  result.name.context = GetTextForRange(maybe_name.getValue().context_range);
+  result.arguments = GetTextForRange(Range(argument_start, qualifiers_start));
+  result.qualifiers = GetTextForRange(Range(qualifiers_start, end_position));
+  start_position.Remove();
+  return result;
+}
+
+llvm::Optional<ParsedFunction>
+CPlusPlusNameParser::ParseFuncPtr(bool expect_return_type) {
+  Bookmark start_position = SetBookmark();
+  if (expect_return_type) {
+    // Consume return type.
+    if (!ConsumeTypename())
+      return None;
+  }
+
+  if (!ConsumeToken(tok::l_paren))
+    return None;
+  if (!ConsumePtrsAndRefs())
+    return None;
+
+  {
+    Bookmark before_inner_function_pos = SetBookmark();
+    auto maybe_inner_function_name = ParseFunctionImpl(false);
+    if (maybe_inner_function_name)
+      if (ConsumeToken(tok::r_paren))
+        if (ConsumeArguments()) {
+          SkipFunctionQualifiers();
+          start_position.Remove();
+          before_inner_function_pos.Remove();
+          return maybe_inner_function_name;
+        }
+  }
+
+  auto maybe_inner_function_ptr_name = ParseFuncPtr(false);
+  if (maybe_inner_function_ptr_name)
+    if (ConsumeToken(tok::r_paren))
+      if (ConsumeArguments()) {
+        SkipFunctionQualifiers();
+        start_position.Remove();
+        return maybe_inner_function_ptr_name;
+      }
+  return None;
+}
+
+bool CPlusPlusNameParser::ConsumeArguments() {
+  return ConsumeBrackets(tok::l_paren, tok::r_paren);
+}
+
+bool CPlusPlusNameParser::ConsumeTemplateArgs() {
+  Bookmark start_position = SetBookmark();
+  if (!HasMoreTokens() || Peek().getKind() != tok::less)
+    return false;
+  Advance();
+
+  // Consuming template arguments is a bit trickier than consuming function
+  // arguments, because '<' '>' brackets are not always trivially balanced.
+  // In some rare cases tokens '<' and '>' can appear inside template arguments
+  // as arithmetic or shift operators not as template brackets.
+  // Examples: std::enable_if<(10u)<(64), bool>
+  //           f<A<operator<(X,Y)::Subclass>>
+  // Good thing that compiler makes sure that really ambiguous cases of
+  // '>' usage should be enclosed within '()' brackets.
+  int template_counter = 1;
+  bool can_open_template = false;
+  while (HasMoreTokens() && template_counter > 0) {
+    tok::TokenKind kind = Peek().getKind();
+    switch (kind) {
+    case tok::greatergreater:
+      template_counter -= 2;
+      can_open_template = false;
+      Advance();
+      break;
+    case tok::greater:
+      --template_counter;
+      can_open_template = false;
+      Advance();
+      break;
+    case tok::less:
+      // '<' is an attempt to open a subteamplte
+      // check if parser is at the point where it's actually possible,
+      // otherwise it's just a part of an expression like 'sizeof(T)<(10)'.
+      // No need to do the same for '>' because compiler actually makes sure
+      // that '>' always surrounded by brackets to avoid ambiguity.
+      if (can_open_template)
+        ++template_counter;
+      can_open_template = false;
+      Advance();
+      break;
+    case tok::kw_operator: // C++ operator overloading.
+      if (!ConsumeOperator())
+        return false;
+      can_open_template = true;
+      break;
+    case tok::raw_identifier:
+      can_open_template = true;
+      Advance();
+      break;
+    case tok::l_square:
+      if (!ConsumeBrackets(tok::l_square, tok::r_square))
+        return false;
+      can_open_template = false;
+      break;
+    case tok::l_paren:
+      if (!ConsumeArguments())
+        return false;
+      can_open_template = false;
+      break;
+    default:
+      can_open_template = false;
+      Advance();
+      break;
+    }
+  }
+
+  assert(template_counter >= 0);
+  if (template_counter > 0) {
+    return false;
+  }
+  start_position.Remove();
+  return true;
+}
+
+bool CPlusPlusNameParser::ConsumeAnonymousNamespace() {
+  Bookmark start_position = SetBookmark();
+  if (!ConsumeToken(tok::l_paren)) {
+    return false;
+  }
+  static ConstString g_anonymous("anonymous");
+  if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&
+      Peek().getRawIdentifier() == g_anonymous.GetStringRef()) {
+    Advance();
+  } else {
+    return false;
+  }
+
+  if (!ConsumeToken(tok::kw_namespace)) {
+    return false;
+  }
+
+  if (!ConsumeToken(tok::r_paren)) {
+    return false;
+  }
+  start_position.Remove();
+  return true;
+}
+
+bool CPlusPlusNameParser::ConsumeBrackets(tok::TokenKind left,
+                                          tok::TokenKind right) {
+  Bookmark start_position = SetBookmark();
+  if (!HasMoreTokens() || Peek().getKind() != left)
+    return false;
+  Advance();
+
+  int counter = 1;
+  while (HasMoreTokens() && counter > 0) {
+    tok::TokenKind kind = Peek().getKind();
+    if (kind == right)
+      --counter;
+    else if (kind == left)
+      ++counter;
+    Advance();
+  }
+
+  assert(counter >= 0);
+  if (counter > 0) {
+    return false;
+  }
+  start_position.Remove();
+  return true;
+}
+
+bool CPlusPlusNameParser::ConsumeOperator() {
+  Bookmark start_position = SetBookmark();
+  if (!ConsumeToken(tok::kw_operator))
+    return false;
+
+  if (!HasMoreTokens()) {
+    return false;
+  }
+
+  const auto &token = Peek();
+  switch (token.getKind()) {
+  case tok::kw_new:
+  case tok::kw_delete:
+    // This is 'new' or 'delete' operators.
+    Advance();
+    // Check for array new/delete.
+    if (HasMoreTokens() && Peek().is(tok::l_square)) {
+      // Consume the '[' and ']'.
+      if (!ConsumeBrackets(tok::l_square, tok::r_square))
+        return false;
+    }
+    break;
+
+#define OVERLOADED_OPERATOR(Name, Spelling, Token, Unary, Binary, MemberOnly)  \
+  case tok::Token:                                                             \
+    Advance();                                                                 \
+    break;
+#define OVERLOADED_OPERATOR_MULTI(Name, Spelling, Unary, Binary, MemberOnly)
+#include "clang/Basic/OperatorKinds.def"
+#undef OVERLOADED_OPERATOR
+#undef OVERLOADED_OPERATOR_MULTI
+
+  case tok::l_paren:
+    // Call operator consume '(' ... ')'.
+    if (ConsumeBrackets(tok::l_paren, tok::r_paren))
+      break;
+    return false;
+
+  case tok::l_square:
+    // This is a [] operator.
+    // Consume the '[' and ']'.
+    if (ConsumeBrackets(tok::l_square, tok::r_square))
+      break;
+    return false;
+
+  default:
+    // This might be a cast operator.
+    if (ConsumeTypename())
+      break;
+    return false;
+  }
+  start_position.Remove();
+  return true;
+}
+
+void CPlusPlusNameParser::SkipTypeQualifiers() {
+  while (ConsumeToken(tok::kw_const, tok::kw_volatile))
+    ;
+}
+
+void CPlusPlusNameParser::SkipFunctionQualifiers() {
+  while (ConsumeToken(tok::kw_const, tok::kw_volatile, tok::amp, tok::ampamp))
+    ;
+}
+
+bool CPlusPlusNameParser::ConsumeBuiltinType() {
+  bool result = false;
+  bool continue_parsing = true;
+  // Built-in types can be made of a few keywords
+  // like 'unsigned long long int'. This function
+  // consumes all built-in type keywords without
+  // checking if they make sense like 'unsigned char void'.
+  while (continue_parsing && HasMoreTokens()) {
+    switch (Peek().getKind()) {
+    case tok::kw_short:
+    case tok::kw_long:
+    case tok::kw___int64:
+    case tok::kw___int128:
+    case tok::kw_signed:
+    case tok::kw_unsigned:
+    case tok::kw_void:
+    case tok::kw_char:
+    case tok::kw_int:
+    case tok::kw_half:
+    case tok::kw_float:
+    case tok::kw_double:
+    case tok::kw___float128:
+    case tok::kw_wchar_t:
+    case tok::kw_bool:
+    case tok::kw_char16_t:
+    case tok::kw_char32_t:
+      result = true;
+      Advance();
+      break;
+    default:
+      continue_parsing = false;
+      break;
+    }
+  }
+  return result;
+}
+
+void CPlusPlusNameParser::SkipPtrsAndRefs() {
+  // Ignoring result.
+  ConsumePtrsAndRefs();
+}
+
+bool CPlusPlusNameParser::ConsumePtrsAndRefs() {
+  bool found = false;
+  SkipTypeQualifiers();
+  while (ConsumeToken(tok::star, tok::amp, tok::ampamp, tok::kw_const,
+                      tok::kw_volatile)) {
+    found = true;
+    SkipTypeQualifiers();
+  }
+  return found;
+}
+
+bool CPlusPlusNameParser::ConsumeTypename() {
+  Bookmark start_position = SetBookmark();
+  SkipTypeQualifiers();
+  if (!ConsumeBuiltinType()) {
+    if (!ParseFullNameImpl())
+      return false;
+  }
+  SkipPtrsAndRefs();
+  start_position.Remove();
+  return true;
+}
+
+llvm::Optional<CPlusPlusNameParser::ParsedNameRanges>
+CPlusPlusNameParser::ParseFullNameImpl() {
+  // Name parsing state machine.
+  enum class State {
+    Beginning,       // start of the name
+    AfterTwoColons,  // right after ::
+    AfterIdentifier, // right after alphanumerical identifier ([a-z0-9_]+)
+    AfterTemplate,   // right after template brackets (<something>)
+    AfterOperator,   // right after name of C++ operator
+  };
+
+  Bookmark start_position = SetBookmark();
+  State state = State::Beginning;
+  bool continue_parsing = true;
+  int last_coloncolon_position = -1;
+
+  while (continue_parsing && HasMoreTokens()) {
+    const auto &token = Peek();
+    switch (token.getKind()) {
+    case tok::raw_identifier: // Just a name.
+      if (state != State::Beginning && state != State::AfterTwoColons) {
+        continue_parsing = false;
+        break;
+      }
+      Advance();
+      state = State::AfterIdentifier;
+      break;
+    case tok::l_paren: {
+      if (state == State::Beginning || state == State::AfterTwoColons) {
+        // (anonymous namespace)
+        if (ConsumeAnonymousNamespace()) {
+          state = State::AfterIdentifier;
+          break;
+        }
+      }
+
+      // Type declared inside a function 'func()::Type'
+      if (state != State::AfterIdentifier && state != State::AfterTemplate &&
+          state != State::AfterOperator) {
+        continue_parsing = false;
+        break;
+      }
+      Bookmark l_paren_position = SetBookmark();
+      // Consume the '(' ... ') [const]'.
+      if (!ConsumeArguments()) {
+        continue_parsing = false;
+        break;
+      }
+      SkipFunctionQualifiers();
+
+      // Consume '::'
+      int coloncolon_position = GetCurrentPosition();
+      if (!ConsumeToken(tok::coloncolon)) {
+        continue_parsing = false;
+        break;
+      }
+      l_paren_position.Remove();
+      last_coloncolon_position = coloncolon_position;
+      state = State::AfterTwoColons;
+      break;
+    }
+    case tok::coloncolon: // Type nesting delimiter.
+      if (state != State::Beginning && state != State::AfterIdentifier &&
+          state != State::AfterTemplate) {
+        continue_parsing = false;
+        break;
+      }
+      last_coloncolon_position = GetCurrentPosition();
+      Advance();
+      state = State::AfterTwoColons;
+      break;
+    case tok::less: // Template brackets.
+      if (state != State::AfterIdentifier && state != State::AfterOperator) {
+        continue_parsing = false;
+        break;
+      }
+      if (!ConsumeTemplateArgs()) {
+        continue_parsing = false;
+        break;
+      }
+      state = State::AfterTemplate;
+      break;
+    case tok::kw_operator: // C++ operator overloading.
+      if (state != State::Beginning && state != State::AfterTwoColons) {
+        continue_parsing = false;
+        break;
+      }
+      if (!ConsumeOperator()) {
+        continue_parsing = false;
+        break;
+      }
+      state = State::AfterOperator;
+      break;
+    case tok::tilde: // Destructor.
+      if (state != State::Beginning && state != State::AfterTwoColons) {
+        continue_parsing = false;
+        break;
+      }
+      Advance();
+      if (ConsumeToken(tok::raw_identifier)) {
+        state = State::AfterIdentifier;
+      } else {
+        TakeBack();
+        continue_parsing = false;
+      }
+      break;
+    default:
+      continue_parsing = false;
+      break;
+    }
+  }
+
+  if (state == State::AfterIdentifier || state == State::AfterOperator ||
+      state == State::AfterTemplate) {
+    ParsedNameRanges result;
+    if (last_coloncolon_position != -1) {
+      result.context_range =
+          Range(start_position.GetSavedPosition(), last_coloncolon_position);
+      result.basename_range =
+          Range(last_coloncolon_position + 1, GetCurrentPosition());
+    } else {
+      result.basename_range =
+          Range(start_position.GetSavedPosition(), GetCurrentPosition());
+    }
+    start_position.Remove();
+    return result;
+  } else {
+    return None;
+  }
+}
+
+llvm::StringRef CPlusPlusNameParser::GetTextForRange(const Range &range) {
+  if (range.empty())
+    return llvm::StringRef();
+  assert(range.m_begin_index < range.m_end_index);
+  assert(range.m_begin_index < (int)m_tokens.size());
+  assert(range.m_end_index <= (int)m_tokens.size());
+  clang::Token &first_token = m_tokens[range.m_begin_index];
+  clang::Token &last_token = m_tokens[range.m_end_index - 1];
+  clang::SourceLocation start_loc = first_token.getLocation();
+  clang::SourceLocation end_loc = last_token.getLocation();
+  unsigned start_pos = start_loc.getRawEncoding();
+  unsigned end_pos = end_loc.getRawEncoding() + last_token.getLength();
+  return m_text.take_front(end_pos).drop_front(start_pos);
+}
+
+static const clang::LangOptions &GetLangOptions() {
+  static clang::LangOptions g_options;
+  static llvm::once_flag g_once_flag;
+  llvm::call_once(g_once_flag, []() {
+    g_options.LineComment = true;
+    g_options.C99 = true;
+    g_options.C11 = true;
+    g_options.CPlusPlus = true;
+    g_options.CPlusPlus11 = true;
+    g_options.CPlusPlus14 = true;
+    g_options.CPlusPlus1z = true;
+  });
+  return g_options;
+}
+
+static const llvm::StringMap<tok::TokenKind> &GetKeywordsMap() {
+  static llvm::StringMap<tok::TokenKind> g_map{
+#define KEYWORD(Name, Flags) {llvm::StringRef(#Name), tok::kw_##Name},
+#include "clang/Basic/TokenKinds.def"
+#undef KEYWORD
+  };
+  return g_map;
+}
+
+void CPlusPlusNameParser::ExtractTokens() {
+  clang::Lexer lexer(clang::SourceLocation(), GetLangOptions(), m_text.data(),
+                     m_text.data(), m_text.data() + m_text.size());
+  const auto &kw_map = GetKeywordsMap();
+  clang::Token token;
+  for (lexer.LexFromRawLexer(token); !token.is(clang::tok::eof);
+       lexer.LexFromRawLexer(token)) {
+    if (token.is(clang::tok::raw_identifier)) {
+      auto it = kw_map.find(token.getRawIdentifier());
+      if (it != kw_map.end()) {
+        token.setKind(it->getValue());
+      }
+    }
+
+    m_tokens.push_back(token);
+  }
+}
Index: source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h
===================================================================
--- source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h
+++ source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h
@@ -29,35 +29,24 @@
 public:
   class MethodName {
   public:
-    enum Type {
-      eTypeInvalid,
-      eTypeUnknownMethod,
-      eTypeClassMethod,
-      eTypeInstanceMethod
-    };
-
     MethodName()
         : m_full(), m_basename(), m_context(), m_arguments(), m_qualifiers(),
-          m_type(eTypeInvalid), m_parsed(false), m_parse_error(false) {}
+          m_parsed(false), m_parse_error(false) {}
 
     MethodName(const ConstString &s)
         : m_full(s), m_basename(), m_context(), m_arguments(), m_qualifiers(),
-          m_type(eTypeInvalid), m_parsed(false), m_parse_error(false) {}
+          m_parsed(false), m_parse_error(false) {}
 
     void Clear();
 
     bool IsValid() {
       if (!m_parsed)
         Parse();
       if (m_parse_error)
         return false;
-      if (m_type == eTypeInvalid)
-        return false;
       return (bool)m_full;
     }
 
-    Type GetType() const { return m_type; }
-
     const ConstString &GetFullName() const { return m_full; }
 
     std::string GetScopeQualifiedName();
@@ -72,15 +61,15 @@
 
   protected:
     void Parse();
+    bool TrySimplifiedParse();
 
     ConstString m_full; // Full name:
                         // "lldb::SBTarget::GetBreakpointAtIndex(unsigned int)
                         // const"
     llvm::StringRef m_basename;   // Basename:     "GetBreakpointAtIndex"
     llvm::StringRef m_context;    // Decl context: "lldb::SBTarget"
     llvm::StringRef m_arguments;  // Arguments:    "(unsigned int)"
     llvm::StringRef m_qualifiers; // Qualifiers:   "const"
-    Type m_type;
     bool m_parsed;
     bool m_parse_error;
   };
@@ -121,7 +110,7 @@
   // If the name is a lone C identifier (e.g. C) or a qualified C identifier
   // (e.g. A::B::C) it will return true,
   // and identifier will be the identifier (C and C respectively) and the
-  // context will be "" and "A::B::" respectively.
+  // context will be "" and "A::B" respectively.
   // If the name fails the heuristic matching for a qualified or unqualified
   // C/C++ identifier, then it will return false
   // and identifier and context will be unchanged.
Index: source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp
===================================================================
--- source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp
+++ source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp
@@ -21,7 +21,6 @@
 
 // Other libraries and framework includes
 #include "llvm/ADT/StringRef.h"
-#include "llvm/Support/Threading.h"
 
 // Project includes
 #include "lldb/Core/PluginManager.h"
@@ -36,6 +35,7 @@
 #include "lldb/Utility/RegularExpression.h"
 
 #include "BlockPointer.h"
+#include "CPlusPlusNameParser.h"
 #include "CxxStringTypes.h"
 #include "LibCxx.h"
 #include "LibCxxAtomic.h"
@@ -85,15 +85,14 @@
   m_context = llvm::StringRef();
   m_arguments = llvm::StringRef();
   m_qualifiers = llvm::StringRef();
-  m_type = eTypeInvalid;
   m_parsed = false;
   m_parse_error = false;
 }
 
-bool ReverseFindMatchingChars(const llvm::StringRef &s,
-                              const llvm::StringRef &left_right_chars,
-                              size_t &left_pos, size_t &right_pos,
-                              size_t pos = llvm::StringRef::npos) {
+static bool ReverseFindMatchingChars(const llvm::StringRef &s,
+                                     const llvm::StringRef &left_right_chars,
+                                     size_t &left_pos, size_t &right_pos,
+                                     size_t pos = llvm::StringRef::npos) {
   assert(left_right_chars.size() == 2);
   left_pos = llvm::StringRef::npos;
   const char left_char = left_right_chars[0];
@@ -119,10 +118,9 @@
   return false;
 }
 
-static bool IsValidBasename(const llvm::StringRef &basename) {
-  // Check that the basename matches with the following regular expression or is
-  // an operator name:
-  // "^~?([A-Za-z_][A-Za-z_0-9]*)(<.*>)?$"
+static bool IsTrivialBasename(const llvm::StringRef &basename) {
+  // Check that the basename matches with the following regular expression
+  // "^~?([A-Za-z_][A-Za-z_0-9]*)$"
   // We are using a hand written implementation because it is significantly more
   // efficient then
   // using the general purpose regular expression library.
@@ -149,100 +147,69 @@
   if (idx == basename.size())
     return true;
 
-  // Check for basename with template arguments
-  // TODO: Improve the quality of the validation with validating the template
-  // arguments
-  if (basename[idx] == '<' && basename.back() == '>')
-    return true;
+  return false;
+}
 
-  // Check if the basename is a vaild C++ operator name
-  if (!basename.startswith("operator"))
-    return false;
+bool CPlusPlusLanguage::MethodName::TrySimplifiedParse() {
+  // This method tries to parse simple method definitions
+  // which are presumably most comman in user programs.
+  // Definitions that can be parsed by this function don't have return types
+  // and templates in the name.
+  // A::B::C::fun(std::vector<T> &) const
+  size_t arg_start, arg_end;
+  llvm::StringRef full(m_full.GetCString());
+  llvm::StringRef parens("()", 2);
+  if (ReverseFindMatchingChars(full, parens, arg_start, arg_end)) {
+    m_arguments = full.substr(arg_start, arg_end - arg_start + 1);
+    if (arg_end + 1 < full.size())
+      m_qualifiers = full.substr(arg_end + 1).ltrim();
+
+    if (arg_start == 0)
+      return false;
+    size_t basename_end = arg_start;
+    size_t context_start = 0;
+    size_t context_end = full.rfind(':', basename_end);
+    if (context_end == llvm::StringRef::npos)
+      m_basename = full.substr(0, basename_end);
+    else {
+      if (context_start < context_end)
+        m_context = full.substr(context_start, context_end - 1 - context_start);
+      const size_t basename_begin = context_end + 1;
+      m_basename = full.substr(basename_begin, basename_end - basename_begin);
+    }
 
-  static RegularExpression g_operator_regex(
-      llvm::StringRef("^(operator)( "
-                      "?)([A-Za-z_][A-Za-z_0-9]*|\\(\\)|"
-                      "\\[\\]|[\\^<>=!\\/"
-                      "*+-]+)(<.*>)?(\\[\\])?$"));
-  std::string basename_str(basename.str());
-  return g_operator_regex.Execute(basename_str, nullptr);
+    if (IsTrivialBasename(m_basename)) {
+      return true;
+    } else {
+      // The C++ basename doesn't match our regular expressions so this can't
+      // be a valid C++ method, clear everything out and indicate an error
+      m_context = llvm::StringRef();
+      m_basename = llvm::StringRef();
+      m_arguments = llvm::StringRef();
+      m_qualifiers = llvm::StringRef();
+      return false;
+    }
+  }
+  return false;
 }
 
 void CPlusPlusLanguage::MethodName::Parse() {
   if (!m_parsed && m_full) {
-    //        ConstString mangled;
-    //        m_full.GetMangledCounterpart(mangled);
-    //        printf ("\n   parsing = '%s'\n", m_full.GetCString());
-    //        if (mangled)
-    //            printf ("   mangled = '%s'\n", mangled.GetCString());
-    m_parse_error = false;
-    m_parsed = true;
-    llvm::StringRef full(m_full.GetCString());
-
-    size_t arg_start, arg_end;
-    llvm::StringRef parens("()", 2);
-    if (ReverseFindMatchingChars(full, parens, arg_start, arg_end)) {
-      m_arguments = full.substr(arg_start, arg_end - arg_start + 1);
-      if (arg_end + 1 < full.size())
-        m_qualifiers = full.substr(arg_end + 1);
-      if (arg_start > 0) {
-        size_t basename_end = arg_start;
-        size_t context_start = 0;
-        size_t context_end = llvm::StringRef::npos;
-        if (basename_end > 0 && full[basename_end - 1] == '>') {
-          // TODO: handle template junk...
-          // Templated function
-          size_t template_start, template_end;
-          llvm::StringRef lt_gt("<>", 2);
-          if (ReverseFindMatchingChars(full, lt_gt, template_start,
-                                       template_end, basename_end)) {
-            // Check for templated functions that include return type like:
-            // 'void foo<Int>()'
-            context_start = full.rfind(' ', template_start);
-            if (context_start == llvm::StringRef::npos)
-              context_start = 0;
-            else
-              ++context_start;
-
-            context_end = full.rfind(':', template_start);
-            if (context_end == llvm::StringRef::npos ||
-                context_end < context_start)
-              context_end = context_start;
-          } else {
-            context_end = full.rfind(':', basename_end);
-          }
-        } else if (context_end == llvm::StringRef::npos) {
-          context_end = full.rfind(':', basename_end);
-        }
-
-        if (context_end == llvm::StringRef::npos)
-          m_basename = full.substr(0, basename_end);
-        else {
-          if (context_start < context_end)
-            m_context =
-                full.substr(context_start, context_end - 1 - context_start);
-          const size_t basename_begin = context_end + 1;
-          m_basename =
-              full.substr(basename_begin, basename_end - basename_begin);
-        }
-        m_type = eTypeUnknownMethod;
+    if (TrySimplifiedParse()) {
+      m_parse_error = false;
+    } else {
+      CPlusPlusNameParser parser(m_full.GetStringRef());
+      if (auto function = parser.ParseAsFunctionDefinition()) {
+        m_basename = function.getValue().name.basename;
+        m_context = function.getValue().name.context;
+        m_arguments = function.getValue().arguments;
+        m_qualifiers = function.getValue().qualifiers;
+        m_parse_error = false;
       } else {
         m_parse_error = true;
-        return;
-      }
-
-      if (!IsValidBasename(m_basename)) {
-        // The C++ basename doesn't match our regular expressions so this can't
-        // be a valid C++ method, clear everything out and indicate an error
-        m_context = llvm::StringRef();
-        m_basename = llvm::StringRef();
-        m_arguments = llvm::StringRef();
-        m_qualifiers = llvm::StringRef();
-        m_parse_error = true;
       }
-    } else {
-      m_parse_error = true;
     }
+    m_parsed = true;
   }
 }
 
@@ -273,14 +240,13 @@
 std::string CPlusPlusLanguage::MethodName::GetScopeQualifiedName() {
   if (!m_parsed)
     Parse();
-  if (m_basename.empty() || m_context.empty())
-    return std::string();
+  if (m_context.empty())
+    return m_basename;
 
   std::string res;
   res += m_context;
   res += "::";
   res += m_basename;
-
   return res;
 }
 
@@ -296,13 +262,10 @@
 
 bool CPlusPlusLanguage::ExtractContextAndIdentifier(
     const char *name, llvm::StringRef &context, llvm::StringRef &identifier) {
-  static RegularExpression g_basename_regex(llvm::StringRef(
-      "^(([A-Za-z_][A-Za-z_0-9]*::)*)(~?[A-Za-z_~][A-Za-z_0-9]*)$"));
-  RegularExpression::Match match(4);
-  if (g_basename_regex.Execute(llvm::StringRef::withNullAsEmpty(name),
-                               &match)) {
-    match.GetMatchAtIndex(name, 1, context);
-    match.GetMatchAtIndex(name, 3, identifier);
+  CPlusPlusNameParser parser(name);
+  if (auto full_name = parser.ParseAsFullName()) {
+    identifier = full_name.getValue().basename;
+    context = full_name.getValue().context;
     return true;
   }
   return false;
Index: source/Plugins/Language/CPlusPlus/CMakeLists.txt
===================================================================
--- source/Plugins/Language/CPlusPlus/CMakeLists.txt
+++ source/Plugins/Language/CPlusPlus/CMakeLists.txt
@@ -1,6 +1,7 @@
 add_lldb_library(lldbPluginCPlusPlusLanguage PLUGIN
   BlockPointer.cpp
   CPlusPlusLanguage.cpp
+  CPlusPlusNameParser.cpp
   CxxStringTypes.cpp
   LibCxx.cpp
   LibCxxAtomic.cpp
_______________________________________________
lldb-commits mailing list
lldb-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to