ziqingluo-90 updated this revision to Diff 491581.
ziqingluo-90 added a comment.

Refactored the fix-it generation code to stop using the pretty-printer.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139737/new/

https://reviews.llvm.org/D139737

Files:
  clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
  clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
  clang/lib/Analysis/UnsafeBufferUsage.cpp
  clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-local-var-span.cpp

Index: clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-local-var-span.cpp
===================================================================
--- /dev/null
+++ clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-local-var-span.cpp
@@ -0,0 +1,96 @@
+// RUN: cp %s %t.cpp
+// RUN: %clang_cc1 -std=c++20 -Wunsafe-buffer-usage -fixit %t.cpp
+// RUN: grep -v CHECK %t.cpp | FileCheck %s
+typedef int * Int_ptr_t;
+typedef int Int_t;
+
+void local_array_subscript_simple() {
+  int tmp;
+// CHECK: std::span<int> p {new int[10], 10};
+// CHECK: std::span<const int> q {new int[10], 10};
+// CHECK: tmp = p[5];
+// CHECK: tmp = q[5];
+  int *p = new int[10];
+  const int *q = new int[10];
+  tmp = p[5];
+  tmp = q[5];
+
+// CHECK: std::span<int> x {new int[10], 10};
+// CHECK: std::span<int> y {new int, 1};
+// CHECK: std::span<Int_t> z {new int[10], 10};
+// CHECK: std::span<Int_t> w {new Int_t[10], 10};
+
+  Int_ptr_t x = new int[10];
+  Int_ptr_t y = new int;
+  Int_t * z = new int[10];
+  Int_t * w = new Int_t[10];
+
+  // CHECK: tmp = x[5];
+  tmp = x[5];
+  // CHECK: tmp = y[5];
+  tmp = y[5]; // y[5] will crash after being span
+  // CHECK: tmp = z[5];
+  tmp = z[5];
+  // CHECK: tmp = w[5];
+  tmp = w[5];
+}
+
+void local_array_subscript_auto() {
+  int tmp;
+// CHECK: std::span<int> p {new int[10], 10};
+// CHECK: tmp = p[5];
+  auto p = new int[10];
+  tmp = p[5];
+}
+
+void local_array_subscript_variable_extent() {
+  int n = 10;
+  int tmp;
+
+  // CHECK: std::span<int> p {new int[n], n};
+  // CHECK: std::span<int> q {new int[n++], <# placeholder #>};
+  // CHECK: tmp = p[5];
+  // CHECK: tmp = q[5];
+  int *p = new int[n];
+  // If the extent expression does not have a constant value, we cannot fill the extent for users...
+  int *q = new int[n++];
+  tmp = p[5];
+  tmp = q[5];
+}
+
+
+void local_ptr_to_array() {
+  int tmp;
+  int n = 10;
+  int a[10];
+  int b[n];  // If the extent expression does not have a constant value, we cannot fill the extent for users...
+  // CHECK: std::span<int> p {a, 10};
+  // CHECK: std::span<int> q {b, <# placeholder #>};
+  // CHECK: tmp = p[5];
+  // CHECK: tmp = q[5];
+  int *p = a;
+  int *q = b;
+  tmp = p[5];
+  tmp = q[5];
+}
+
+void local_ptr_addrof_init() {
+  int var;
+// CHECK: std::span<int> q {&var, 1};
+// CHECK: var = q[5];
+  int * q = &var;
+  // This expression involves unsafe buffer accesses, which will crash
+  // at runtime after applying the fix-it,
+  var = q[5];
+}
+
+void decl_without_init() {
+  int tmp;
+  // CHECK: std::span<int> p;
+  int * p;
+  // CHECK: std::span<int> q;
+  Int_ptr_t q;
+
+  tmp = p[5];
+  tmp = q[5];
+}
Index: clang/lib/Analysis/UnsafeBufferUsage.cpp
===================================================================
--- clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -9,6 +9,7 @@
 #include "clang/Analysis/Analyses/UnsafeBufferUsage.h"
 #include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/Lex/Lexer.h"
 #include "llvm/ADT/SmallVector.h"
 #include <memory>
 #include <optional>
@@ -115,6 +116,21 @@
   MatchDescendantVisitor Visitor(&DTM, Finder, Builder, ASTMatchFinder::BK_All);  
   return Visitor.findMatch(DynTypedNode::create(Node));
 }
+
+AST_MATCHER_P(CastExpr, castSubExpr, internal::Matcher<Expr>, innerMatcher) {
+  return innerMatcher.matches(*Node.getSubExpr(), Finder, Builder);
+}
+
+// Returns a matcher that matches any expression 'e' such that `innerMatcher`
+// matches 'e' and 'e' is in an Unspecified Lvalue Context.
+static auto isInUnspecifiedLvalueContext(internal::Matcher<Expr> innerMatcher) {
+  auto isLvalueToRvalueCast = [](internal::Matcher<Expr> M) {
+    return implicitCastExpr(hasCastKind(CastKind::CK_LValueToRValue),
+                            castSubExpr(M));
+  };
+  //FIXME: add assignmentTo context...
+  return isLvalueToRvalueCast(innerMatcher);
+}
 } // namespace clang::ast_matchers
 
 namespace {
@@ -282,7 +298,7 @@
 /// Array subscript expressions on raw pointers as if they're arrays. Unsafe as
 /// it doesn't have any bounds checks for the array.
 class ArraySubscriptGadget : public WarningGadget {
-  static constexpr const char *const ArraySubscrTag = "arraySubscr";
+  static constexpr const char *const ArraySubscrTag = "ArraySubscript";
   const ArraySubscriptExpr *ASE;
 
 public:
@@ -366,6 +382,51 @@
   // FIXME: pointer adding zero should be fine
   //FIXME: this gadge will need a fix-it
 };
+
+class Strategy;
+
+// Represents expressions of the form `DRE[*]` in the Unspecified Lvalue
+// Context (see `isInUnspecifiedLvalueContext`).
+// Note here `[]` is the built-in subscript operator.
+class ULCArraySubscriptGadget : public FixableGadget {
+private:
+  static constexpr const char *const ULCArraySubscriptTag = "ArraySubscriptUnderULC";
+  const ArraySubscriptExpr *Node;
+
+public:
+  ULCArraySubscriptGadget(const MatchFinder::MatchResult &Result)
+      : FixableGadget(Kind::ULCArraySubscript),
+        Node(Result.Nodes.getNodeAs<ArraySubscriptExpr>(ULCArraySubscriptTag)) {
+    assert(Node != nullptr && "Expecting a non-null matching result");
+  }
+
+  static bool classof(const Gadget *G) {
+    return G->getKind() == Kind::ULCArraySubscript;
+  }
+
+  static Matcher matcher() {
+    auto ArrayOrPtr = anyOf(hasPointerType(), hasArrayType());
+    auto BaseIsArrayOrPtrDRE =
+        hasBase(ignoringParenImpCasts(allOf(declRefExpr(), ArrayOrPtr)));
+    auto Target =
+        arraySubscriptExpr(BaseIsArrayOrPtrDRE,
+                           unless(hasIndex(integerLiteral(equals(0)))))
+            .bind(ULCArraySubscriptTag);
+
+    return expr(isInUnspecifiedLvalueContext(Target));
+  }
+
+  virtual Optional<FixItList> getFixits(const Strategy &S) const override;
+
+  virtual const Stmt *getBaseStmt() const override { return Node; }
+
+  virtual DeclUseList getClaimedVarUseSites() const override {
+    if (const auto *DRE = dyn_cast<DeclRefExpr>(Node->getBase()->IgnoreImpCasts())) {
+      return {DRE};
+    }
+    return {};
+  }
+};
 } // namespace
 
 namespace {
@@ -519,26 +580,30 @@
   // clang-format off
   M.addMatcher(
     stmt(forEveryDescendant(
+      eachOf(
+      // A `FixableGadget` matcher and a `WarningGadget` matcher should not disable
+      // each other (they could if they were put in the same `anyOf` group).
+      // We also should make sure no two `FixableGadget` (resp. `WarningGadget`) matchers
+      // match for the same node, so that we can group them
+      // in one `anyOf` group (for better performance via short-circuiting).
       stmt(anyOf(
-        // Add Gadget::matcher() for every gadget in the registry.
-#define GADGET(x)                                                              \
+#define FIXABLE_GADGET(x)                                                              \
         x ## Gadget::matcher().bind(#x),
 #include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
-        // In parallel, match all DeclRefExprs so that to find out
-        // whether there are any uncovered by gadgets.
-        declRefExpr(anyOf(hasPointerType(), hasArrayType()),
-                    to(varDecl())).bind("any_dre"),
         // Also match DeclStmts because we'll need them when fixing
         // their underlying VarDecls that otherwise don't have
         // any backreferences to DeclStmts.
         declStmt().bind("any_ds")
-      ))
-      // FIXME: Idiomatically there should be a forCallable(equalsNode(D))
-      // here, to make sure that the statement actually belongs to the
-      // function and not to a nested function. However, forCallable uses
-      // ParentMap which can't be used before the AST is fully constructed.
-      // The original problem doesn't sound like it needs ParentMap though,
-      // maybe there's a more direct solution?
+      )),
+      stmt(anyOf(
+        // Add Gadget::matcher() for every gadget in the registry.
+#define WARNING_GADGET(x)                                                              \
+        x ## Gadget::matcher().bind(#x),
+#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
+        // In parallel, match all DeclRefExprs so that to find out
+        // whether there are any uncovered by gadgets.
+        declRefExpr(anyOf(hasPointerType(), hasArrayType()), to(varDecl())).bind("any_dre")
+      )))
     )),
     &CB
   );
@@ -607,11 +672,232 @@
   return FixablesForUnsafeVars;
 }
 
+static Strategy
+getNaiveStrategy(const llvm::SmallVectorImpl<const VarDecl *> &UnsafeVars) {
+  Strategy S;
+  for (const VarDecl *VD : UnsafeVars) {
+    S.set(VD, Strategy::Kind::Span);
+  }
+  return S;
+}
+
+Optional<FixItList>
+ULCArraySubscriptGadget::getFixits(const Strategy &S) const {
+  if (const auto *DRE = dyn_cast<DeclRefExpr>(Node->getBase()->IgnoreImpCasts()))
+    if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
+      switch (S.lookup(VD)) {
+      case Strategy::Kind::Span:
+        return FixItList{};
+      case Strategy::Kind::Wontfix:
+      case Strategy::Kind::Iterator:
+      case Strategy::Kind::Array:
+      case Strategy::Kind::Vector:
+        llvm_unreachable("unsupported strategies for FixableGadgets");
+      }
+    }
+  return std::nullopt;
+}
+
+// Return the text representation of the given `APInt Val`:
+static std::string getAPIntText(APInt Val) {
+  SmallVector<char> Txt;
+  Val.toString(Txt, 10, true);
+  return Txt.data();
+}
+
+// Return the SourceLocation right before the given `Loc`
+static SourceLocation prevLoc(SourceLocation Loc, const SourceManager &SM) {
+  auto [FileID, Offset] = SM.getDecomposedLoc(Loc);
+  return SM.getComposedLoc(FileID, Offset - 1);
+}
+
+// Return text representation of an `Expr`.  In case unable to get the text
+// representatiion of `E`, return `Default`.
+static StringRef getExprTextOr(const Expr *E, const SourceManager &SM,
+                               const LangOptions &LangOpts, StringRef Default) {
+  std::optional<Token> EndToken =
+      Lexer::findNextToken(prevLoc(E->getEndLoc(), SM), SM, LangOpts);
+
+  if (!EndToken)
+    return Default;
+  return Lexer::getSourceText(
+      CharSourceRange::getCharRange(E->getBeginLoc(), EndToken->getEndLoc()),
+      SM, LangOpts);
+}
+
+// Return the source location right passed the last character of `Node`.
+template <typename NodeTy>
+static std::optional<SourceLocation> getPassedLoc(const NodeTy *Node,
+                                                  const SourceManager &SM,
+                                                  const LangOptions &LangOpts) {
+  std::optional<Token> LastToken =
+      Lexer::findNextToken(prevLoc(Node->getEndLoc(), SM), SM, LangOpts);
+
+  if (!LastToken)
+    return std::nullopt;
+  return LastToken->getEndLoc();
+}
+
+// For a non-null initializer `Init` of `T *` type, this function returns
+// `FixItHint`s producing a list initializer `{Init,  S}` as a part of a fix-it
+// to output stream.
+// In many cases, this function cannot figure out the actual extent `S`.  It
+// then will use a place holder to replace `S` to ask users to fill `S` in.  The
+// initializer shall be used to initialize a variable of type `std::span<T>`.
+//
+// FIXME: Support multi-level pointers
+//
+// Parameters:
+//   `Init` a pointer to the initializer expression
+//   `Ctx` a reference to the ASTContext
+static FixItList populateInitializerFixItWithSpan(const Expr *Init,
+                                                  const ASTContext &Ctx) {
+  static const std::string DefaultExtentTxt =
+      UnsafeBufferUsageHandler::getUserFillPlaceHolder();
+  FixItList FixIts{};
+  const SourceManager &SM = Ctx.getSourceManager();
+  const LangOptions &LangOpts = Ctx.getLangOpts();
+  std::string ExtentText = DefaultExtentTxt;
+  StringRef One = "1";
+
+  // Insert `{` before `Init`:
+  FixIts.push_back(FixItHint::CreateInsertion(Init->getBeginLoc(), "{"));
+  // Try to get the data extent. Break into different cases:
+  if (auto CxxNew = dyn_cast<CXXNewExpr>(Init->IgnoreImpCasts())) {
+    // In cases `Init` is `new T[n]` and there is no explicit cast over
+    // `Init`, we know that `Init` must evaluates to a pointer to `n` objects
+    // of `T`. So the extent is `n` unless `n` has side effects.  Similar but
+    // simpler for the case where `Init` is `new T`.
+    if (const Expr *Ext = CxxNew->getArraySize().value_or(nullptr)) {
+      if (!Ext->HasSideEffects(Ctx))
+        ExtentText =
+            getExprTextOr(Ext, SM, LangOpts, StringRef(DefaultExtentTxt));
+    } else if (!CxxNew->isArray())
+      ExtentText = One;
+  } else if (auto CArrTy = dyn_cast<ConstantArrayType>(
+                 Init->IgnoreImpCasts()->getType().getTypePtr())) {
+    // In cases `Init` is of an array type after stripping off implicit casts,
+    // the extent is the array size.  Note that if the array size is not a
+    // constant, we cannot use it as the extent.
+    ExtentText = getAPIntText(CArrTy->getSize());
+  } else {
+    // In cases `Init` is of the form `&Var` after stripping of implicit
+    // casts, where `&` is the built-in operator, the extent is 1.
+    if (auto AddrOfExpr = dyn_cast<UnaryOperator>(Init->IgnoreImpCasts()))
+      if (AddrOfExpr->getOpcode() == UnaryOperatorKind::UO_AddrOf &&
+          isa_and_present<DeclRefExpr>(AddrOfExpr->getSubExpr()))
+        ExtentText = One;
+    // TODO: we can handle more cases, e.g., `&a[0]`, `&a`, std::addressof,...
+    // etc.
+  }
+
+  SmallString<32> StrBuffer{};
+  std::optional<SourceLocation> LocPassInit = 
+      getPassedLoc(Init, SM, LangOpts);
+
+  if (!LocPassInit)
+    return {};
+  StrBuffer.append(", ");
+  StrBuffer.append(ExtentText);
+  StrBuffer.append("}");
+  FixIts.push_back(
+      FixItHint::CreateInsertion(*LocPassInit, StrBuffer.str()));
+  return FixIts;
+}
+
+// For a `VarDecl` of the form `T  * var (= Init)?`, this
+// function generates a fix-it for the declaration, which re-declares `var` to
+// be of `span<T>` type and transforms the initializer, if present, to a span
+// constructor---`span<T> var {Init, Extent}`, where `Extent` may need the user
+// to fill in.
+//
+// FIXME: support Multi-level pointers
+//
+// Parameters:
+//   `D` a pointer the variable declaration node
+//   `Ctx` a reference to the ASTContext
+// Returns:
+//    the generated fix-it
+static FixItList fixVarDeclWithSpan(const VarDecl *D, const ASTContext &Ctx) {
+  const QualType &T = D->getType().getDesugaredType(Ctx);
+  const QualType &SpanEltT = T->getPointeeType();
+  assert(!SpanEltT.isNull() && "Trying to fix a non-pointer type variable!");
+
+  FixItList FixIts{};
+  SourceLocation
+      ReplacementLastLoc; // the inclusive end location of the replacement
+  const SourceManager &SM = Ctx.getSourceManager();
+
+  if (const Expr *Init = D->getInit()) {
+    FixItList InitFixIts = populateInitializerFixItWithSpan(Init, Ctx);
+
+    if (InitFixIts.empty())
+      return {}; // Something wrong with fixing initializer, give up
+    // The loc right before the initializer:
+    ReplacementLastLoc = prevLoc(Init->getBeginLoc(), SM);
+    FixIts.insert(FixIts.end(), std::make_move_iterator(InitFixIts.begin()),
+                  std::make_move_iterator(InitFixIts.end()));
+  } else if (auto LocPassDecl = getPassedLoc(D, SM, Ctx.getLangOpts()))
+    // The loc right before the loc right passed `D`:
+    ReplacementLastLoc = prevLoc(*LocPassDecl, SM);
+  else
+    return {}; // Something wrong, give up
+
+  // Producing fix-it for variable declaration---make `D` to be of span type:
+  SmallString<32> Replacement;
+  raw_svector_ostream OS(Replacement);
+
+  OS << "std::span<" << SpanEltT.getAsString() << "> " << D->getName();
+  FixIts.push_back(FixItHint::CreateReplacement(
+      SourceRange{D->getBeginLoc(), ReplacementLastLoc}, OS.str()));
+  return FixIts;
+}
+
+static FixItList fixVariableWithSpan(const VarDecl *VD,
+                                     const DeclUseTracker &Tracker,
+                                     const ASTContext &Ctx) {
+  const DeclStmt *DS = Tracker.lookupDecl(VD);
+  assert(DS && "Fixing non-local variables not implemented yet!");
+  assert(DS->getSingleDecl() == VD &&
+         "Fixing non-single declarations not implemented yet!");
+  // Currently DS is an unused variable but we'll need it when
+  // non-single decls are implemented, where the pointee type name
+  // and the '*' are spread around the place.
+  (void)DS;
+
+  // FIXME: handle cases where DS has multiple declarations
+  return fixVarDeclWithSpan(VD, Ctx);
+}
+
+static FixItList fixVariable(const VarDecl *VD, Strategy::Kind K,
+                             const DeclUseTracker &Tracker,
+                             const ASTContext &Ctx) {
+  switch (K) {
+  case Strategy::Kind::Span: {
+    if (VD->getType()->isPointerType())
+      return fixVariableWithSpan(VD, Tracker, Ctx);
+    return {};
+  }
+  case Strategy::Kind::Iterator:
+  case Strategy::Kind::Array:
+  case Strategy::Kind::Vector:
+    llvm_unreachable("Strategy not implemented yet!");
+  case Strategy::Kind::Wontfix:
+    llvm_unreachable("Invalid strategy!");
+  }
+}
+
 static std::map<const VarDecl *, FixItList>
-getFixIts(FixableGadgetSets &FixablesForUnsafeVars, const Strategy &S) {
+getFixIts(FixableGadgetSets &FixablesForUnsafeVars, const Strategy &S, const DeclUseTracker &Tracker, const ASTContext &Ctx) {
   std::map<const VarDecl *, FixItList> FixItsForVariable;
   for (const auto &[VD, Fixables] : FixablesForUnsafeVars.byVar) {
-    // TODO fixVariable - fixit for the variable itself
+    FixItsForVariable[VD] = fixVariable(VD, S.lookup(VD), Tracker, Ctx);
+    // If we fail to produce Fix-It for the declaration we have to skip the variable entirely.
+    if (FixItsForVariable[VD].empty()) {
+      FixItsForVariable.erase(VD);
+      continue;
+    }
+
     bool ImpossibleToFix = false;
     llvm::SmallVector<FixItHint, 16> FixItsForVD;
     for (const auto &F : Fixables) {
@@ -634,15 +920,6 @@
   return FixItsForVariable;
 }
 
-static Strategy
-getNaiveStrategy(const llvm::SmallVectorImpl<const VarDecl *> &UnsafeVars) {
-  Strategy S;
-  for (const VarDecl *VD : UnsafeVars) {
-    S.set(VD, Strategy::Kind::Span);
-  }
-  return S;
-}
-
 void clang::checkUnsafeBufferUsage(const Decl *D,
                                    UnsafeBufferUsageHandler &Handler) {
   assert(D && D->getBody());
@@ -675,7 +952,7 @@
 
   Strategy NaiveStrategy = getNaiveStrategy(UnsafeVars);
   std::map<const VarDecl *, FixItList> FixItsForVariable =
-      getFixIts(FixablesForUnsafeVars, NaiveStrategy);
+      getFixIts(FixablesForUnsafeVars, NaiveStrategy, Tracker, D->getASTContext());
 
   // FIXME Detect overlapping FixIts.
 
Index: clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
===================================================================
--- clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
+++ clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
@@ -29,6 +29,7 @@
 WARNING_GADGET(Decrement)
 WARNING_GADGET(ArraySubscript)
 WARNING_GADGET(PointerArithmetic)
+FIXABLE_GADGET(ULCArraySubscript)
 
 #undef FIXABLE_GADGET
 #undef WARNING_GADGET
Index: clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
===================================================================
--- clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
+++ clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
@@ -37,6 +37,15 @@
   /// Invoked when a fix is suggested against a variable.
   virtual void handleFixableVariable(const VarDecl *Variable,
                                      FixItList &&List) = 0;
+
+  /// Returns the text indicating that the user needs to provide input there:
+  static std::string
+  getUserFillPlaceHolder(const StringRef &HintTextToUser = "placeholder") {
+    std::string s = std::string("<# ");
+    s += HintTextToUser;
+    s += " #>";
+    return s;
+  }
 };
 
 // This function invokes the analysis and allows the caller to react to it
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to