kastiglione created this revision.
kastiglione added reviewers: JDevlieghere, jingham, mib, aprantl.
Herald added a subscriber: mgorny.
Herald added a project: All.
kastiglione requested review of this revision.
Herald added a project: LLDB.
Herald added a subscriber: lldb-commits.

Use `std::regex` as the backing implementation of `RegularExpression`.

This provides common and expected regex features, from basic features like
`\w` and `\d` character classes, to more advanced features such as lookahead.

This change used to be gated on the minimum GCC version, because some versions
of GCC had incomplete support for `std::regex`, but the GCC required by llvm
for the past couple years should be sufficient, according to docs. At this time
GCC >=7.1 is required.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D132307

Files:
  lldb/include/lldb/Utility/RegularExpression.h
  lldb/source/Utility/CMakeLists.txt
  lldb/source/Utility/RegularExpression.cpp
  lldb/test/API/commands/breakpoint/set/func-regex/TestBreakpointRegexError.py
  
lldb/test/API/functionalities/breakpoint/source_regexp/TestSourceRegexBreakpoints.py
  lldb/test/API/functionalities/breakpoint/source_regexp/a.c
  lldb/test/API/functionalities/breakpoint/source_regexp/main.c
  
lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/multiset/TestDataFormatterGenericMultiSet.py
  
lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/set/TestDataFormatterGenericSet.py

Index: lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/set/TestDataFormatterGenericSet.py
===================================================================
--- lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/set/TestDataFormatterGenericSet.py
+++ lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/set/TestDataFormatterGenericSet.py
@@ -130,7 +130,7 @@
         """Test that the data formatters work on ref and ptr."""
         self.build()
         (self.target, process, _, bkpt) = lldbutil.run_to_source_breakpoint(
-            self, "Stop here to check by ref and ptr.",
+            self, "Stop here to check by ref and ptr",
             lldb.SBFileSpec("main.cpp", False))
         # The reference should print just like the value:
         self.check("ref", 7)
Index: lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/multiset/TestDataFormatterGenericMultiSet.py
===================================================================
--- lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/multiset/TestDataFormatterGenericMultiSet.py
+++ lldb/test/API/functionalities/data-formatter/data-formatter-stl/generic/multiset/TestDataFormatterGenericMultiSet.py
@@ -129,7 +129,7 @@
         """Test that the data formatters work on ref and ptr."""
         self.build(dictionary={stdlib_type: "1"})
         (self.target, process, _, bkpt) = lldbutil.run_to_source_breakpoint(
-            self, "Stop here to check by ref and ptr.",
+            self, "Stop here to check by ref and ptr",
             lldb.SBFileSpec("main.cpp", False))
         # The reference should print just like the value:
         self.check("ref", 7)
Index: lldb/test/API/functionalities/breakpoint/source_regexp/main.c
===================================================================
--- lldb/test/API/functionalities/breakpoint/source_regexp/main.c
+++ lldb/test/API/functionalities/breakpoint/source_regexp/main.c
@@ -4,7 +4,7 @@
 int
 main_func(int input)
 {
-  return printf("Set B breakpoint here: %d.\n", input);
+  return printf("Set B0 breakpoint here: %d.\n", input);
 }
 
 int
Index: lldb/test/API/functionalities/breakpoint/source_regexp/a.c
===================================================================
--- lldb/test/API/functionalities/breakpoint/source_regexp/a.c
+++ lldb/test/API/functionalities/breakpoint/source_regexp/a.c
@@ -5,12 +5,12 @@
 static int
 main_func(int input)
 {
-  return printf("Set B breakpoint here: %d", input);
+  return printf("Set B2 breakpoint here: %d", input);
 }
 
 int
 a_func(int input)
 {
-  input += 1; // Set A breakpoint here;
+  input += 1; // Set A1 breakpoint here;
   return main_func(input);
 }
Index: lldb/test/API/functionalities/breakpoint/source_regexp/TestSourceRegexBreakpoints.py
===================================================================
--- lldb/test/API/functionalities/breakpoint/source_regexp/TestSourceRegexBreakpoints.py
+++ lldb/test/API/functionalities/breakpoint/source_regexp/TestSourceRegexBreakpoints.py
@@ -34,7 +34,7 @@
         func_names = lldb.SBStringList()
         func_names.AppendString("a_func")
 
-        source_regex = "Set . breakpoint here"
+        source_regex = r"Set \w\d? breakpoint here"
         main_break = target.BreakpointCreateBySourceRegex(
             source_regex, lldb.SBFileSpecList(), target_files, func_names)
         num_locations = main_break.GetNumLocations()
@@ -50,7 +50,7 @@
             address.IsValid(),
             "Got a valid address from the location.")
 
-        a_func_line = line_number("a.c", "Set A breakpoint here")
+        a_func_line = line_number("a.c", "Set A1 breakpoint here")
         line_entry = address.GetLineEntry()
         self.assertTrue(line_entry.IsValid(), "Got a valid line entry.")
         self.assertEquals(line_entry.line, a_func_line,
@@ -66,7 +66,7 @@
         # First look just in main:
         target_files = lldb.SBFileSpecList()
         target_files.Append(lldb.SBFileSpec("main.c"))
-        source_regex = "Set . breakpoint here"
+        source_regex = r"Set \w\d? breakpoint here"
         main_break = target.BreakpointCreateBySourceRegex(
             source_regex, lldb.SBFileSpecList(), target_files, lldb.SBStringList())
 
Index: lldb/test/API/commands/breakpoint/set/func-regex/TestBreakpointRegexError.py
===================================================================
--- lldb/test/API/commands/breakpoint/set/func-regex/TestBreakpointRegexError.py
+++ lldb/test/API/commands/breakpoint/set/func-regex/TestBreakpointRegexError.py
@@ -8,17 +8,17 @@
     @no_debug_info_test
     def test_error(self):
         self.expect("breakpoint set --func-regex (", error=True,
-                    substrs=["error: Function name regular expression could " +
-                             "not be compiled: parentheses not balanced"])
+                    substrs=["error: Function name regular expression could "
+                             "not be compiled: The expression contained mismatched ( and )."])
 
         # Point out if looks like the user provided a globbing expression.
         self.expect("breakpoint set --func-regex *a", error=True,
-                    substrs=["error: Function name regular expression could " +
-                             "not be compiled: repetition-operator operand invalid",
+                    substrs=["error: Function name regular expression could "
+                             "not be compiled: One of *?+{ was not preceded by a valid regular expression.",
                              "warning: Function name regex does not accept glob patterns."])
         self.expect("breakpoint set --func-regex ?a", error=True,
-                    substrs=["error: Function name regular expression could " +
-                             "not be compiled: repetition-operator operand invalid",
+                    substrs=["error: Function name regular expression could "
+                             "not be compiled: One of *?+{ was not preceded by a valid regular expression.",
                              "warning: Function name regex does not accept glob patterns."])
         # Make sure that warning is only shown for invalid regular expressions
         # that look like a globbing expression (i.e., they have a leading * or ?).
Index: lldb/source/Utility/RegularExpression.cpp
===================================================================
--- lldb/source/Utility/RegularExpression.cpp
+++ lldb/source/Utility/RegularExpression.cpp
@@ -7,15 +7,21 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/Utility/RegularExpression.h"
+#include "llvm/ADT/StringRef.h"
 
 #include <string>
 
 using namespace lldb_private;
 
 RegularExpression::RegularExpression(llvm::StringRef str)
-    : m_regex_text(std::string(str)),
-      // m_regex does not reference str anymore after it is constructed.
-      m_regex(llvm::Regex(str)) {}
+    // m_regex does not reference str anymore after it is constructed.
+    : m_regex_text(std::string(str)) {
+  try {
+    m_regex = m_regex_text;
+  } catch (const std::regex_error &e) {
+    m_regex_error = e;
+  }
+}
 
 RegularExpression::RegularExpression(const RegularExpression &rhs)
     : RegularExpression(rhs.GetText()) {}
@@ -25,17 +31,33 @@
     llvm::SmallVectorImpl<llvm::StringRef> *matches) const {
   if (!IsValid())
     return false;
-  return m_regex.match(str, matches);
+
+  if (!matches)
+    return std::regex_search(str.begin(), str.end(), m_regex);
+
+  std::cmatch result;
+  if (!std::regex_search(str.begin(), str.end(), result, m_regex))
+    return false;
+
+  matches->clear();
+  for (const auto &sub : result) {
+    llvm::StringRef match(sub.first, sub.length());
+    matches->push_back(match);
+  }
+  return true;
 }
 
-bool RegularExpression::IsValid() const { return m_regex.isValid(); }
+bool RegularExpression::IsValid() const {
+  return !m_regex_text.empty() && !m_regex_error;
+}
 
 llvm::StringRef RegularExpression::GetText() const { return m_regex_text; }
 
 llvm::Error RegularExpression::GetError() const {
-  std::string error;
-  if (!m_regex.isValid(error))
+  if (!IsValid()) {
+    std::string error = m_regex_error ? m_regex_error->what() : "empty regex";
     return llvm::make_error<llvm::StringError>(error,
                                                llvm::inconvertibleErrorCode());
+  }
   return llvm::Error::success();
 }
Index: lldb/source/Utility/CMakeLists.txt
===================================================================
--- lldb/source/Utility/CMakeLists.txt
+++ lldb/source/Utility/CMakeLists.txt
@@ -23,6 +23,12 @@
     list(APPEND LLDB_SYSTEM_LIBS atomic)
 endif()
 
+set_source_files_properties(
+  RegularExpression.cpp
+  PROPERTIES COMPILE_OPTIONS
+  "-fcxx-exceptions"
+)
+
 add_lldb_library(lldbUtility
   ArchSpec.cpp
   Args.cpp
Index: lldb/include/lldb/Utility/RegularExpression.h
===================================================================
--- lldb/include/lldb/Utility/RegularExpression.h
+++ lldb/include/lldb/Utility/RegularExpression.h
@@ -13,6 +13,8 @@
 #include "llvm/Support/Error.h"
 #include "llvm/Support/Regex.h"
 
+#include <optional>
+#include <regex>
 namespace lldb_private {
 
 class RegularExpression {
@@ -86,7 +88,8 @@
   /// A copy of the original regular expression text.
   std::string m_regex_text;
   /// The compiled regular expression.
-  mutable llvm::Regex m_regex;
+  std::regex m_regex;
+  std::optional<std::regex_error> m_regex_error;
 };
 
 } // namespace lldb_private
_______________________________________________
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to