ziqingluo-90 updated this revision to Diff 495325.
ziqingluo-90 retitled this revision from "[-Wunsafe-buffer-usage][WIP] Fix-Its 
transforming `&DRE[any]` to `DRE.data() + any`" to 
"[-Wunsafe-buffer-usage][WIP] Fix-Its transforming `&DRE[any]` to `(DRE.data() 
+ any)`".
ziqingluo-90 added a comment.

Let `fixUPCAddressofArraySubscriptWithSpan` return `std::nullopt` instead of an 
empty list when we should give up on the fix-it.

Add a few test cases for some corner cases.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D143128/new/

https://reviews.llvm.org/D143128

Files:
  clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
  clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
  clang/lib/Analysis/UnsafeBufferUsage.cpp
  
clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-addressof-arraysubscript.cpp

Index: clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-addressof-arraysubscript.cpp
===================================================================
--- /dev/null
+++ clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-addressof-arraysubscript.cpp
@@ -0,0 +1,74 @@
+// RUN: %clang_cc1 -std=c++20 -Wunsafe-buffer-usage -fdiagnostics-parseable-fixits %s 2>&1 | FileCheck %s
+
+int f(unsigned long, void *);
+
+[[clang::unsafe_buffer_usage]]
+int unsafe_f(unsigned long, void *);
+
+void address_to_integer(int x) {
+  int * p = new int[10];
+  unsigned long n = (unsigned long) &p[5];
+  // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:37-[[@LINE-1]]:42}:"(p.data() + 5)"
+  unsigned long m = (unsigned long) &p[x];
+  // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:37-[[@LINE-1]]:42}:"(p.data() + x)"
+}
+
+void call_argument(int x) {
+  int * p = new int[10];
+
+  f((unsigned long) &p[5], &p[x]);
+  // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:21-[[@LINE-1]]:26}:"(p.data() + 5)"
+  // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:28-[[@LINE-2]]:33}:"(p.data() + x)"
+}
+
+void ignore_unsafe_calls(int x) {
+  // Cannot fix `&p[x]` for now as it is an argument of an unsafe
+  // call. So no fix for variable `p`.
+  int * p = new int[10];
+  // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]
+  unsafe_f((unsigned long) &p[5],
+	   // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]
+	   &p[x]);
+
+  int * q = new int[10];
+  // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:12}:"std::span<int> q"
+  // CHECK: fix-it:"{{.*}}":{[[@LINE-2]]:13-[[@LINE-2]]:13}:"{"
+  // CHECK: fix-it:"{{.*}}":{[[@LINE-3]]:24-[[@LINE-3]]:24}:", 10}"
+  unsafe_f((unsigned long) &q[5],
+	   // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:28-[[@LINE-1]]:33}:"(q.data() + 5)"
+	   (void*)0);
+}
+
+void odd_subscript_form() {
+  int * p = new int[10];
+  unsigned long n = (unsigned long) &5[p];
+  // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:37-[[@LINE-1]]:42}:"(p.data() + 5)"
+}
+
+// To test multiple function declarations, each of which carries
+// different incomplete informations:
+[[clang::unsafe_buffer_usage]]
+void unsafe_g(void*);
+
+void unsafe_g(void*);
+
+void multiple_unsafe_fundecls() {
+  int * p = new int[10];
+  // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]
+  unsafe_g(&p[5]);
+  // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]
+}
+
+void unsafe_h(void*);
+
+[[clang::unsafe_buffer_usage]]
+void unsafe_h(void*);
+
+void unsafe_h(void* p) { ((char*)p)[10]; }
+
+void multiple_unsafe_fundecls2() {
+  int * p = new int[10];
+  // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]
+  unsafe_h(&p[5]);
+  // CHECK-NOT: fix-it:"{{.*}}":{[[@LINE-1]]
+}
Index: clang/lib/Analysis/UnsafeBufferUsage.cpp
===================================================================
--- clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -110,6 +110,15 @@
   bool Matches;
 };
 
+// Because we're dealing with raw pointers, let's define what we mean by that.
+static auto hasPointerType() {
+    return hasType(hasCanonicalType(pointerType()));
+}
+
+static auto hasArrayType() {
+    return hasType(hasCanonicalType(arrayType()));
+}
+
 AST_MATCHER_P(Stmt, forEveryDescendant, internal::Matcher<Stmt>, innerMatcher) {
   const DynTypedMatcher &DTM = static_cast<DynTypedMatcher>(innerMatcher);
   
@@ -129,6 +138,29 @@
                           castSubExpr(innerMatcher));
   // FIXME: add assignmentTo context...
 }
+
+// Returns a matcher that matches any expression `e` such that `InnerMatcher`
+// matches `e` and `e` is in an Unspecified Pointer Context (UPC).
+static internal::Matcher<Stmt>
+isInUnspecifiedPointerContext(internal::Matcher<Stmt> InnerMatcher) {
+  // A UPC can be
+  // 1. an argument of a function call (except the callee has [[unsafe_...]]
+  // attribute), or
+  // 2. the operand of a cast operation; or
+  // ...
+  auto CallArgMatcher =
+      callExpr(hasAnyArgument(allOf(
+                   hasPointerType() /* array also decays to pointer type*/,
+                   InnerMatcher)),
+               unless(callee(functionDecl(hasAttr(attr::UnsafeBufferUsage)))));
+  auto CastOperandMatcher =
+      explicitCastExpr(hasCastKind(CastKind::CK_PointerToIntegral),
+                       castSubExpr(allOf(hasPointerType(), InnerMatcher)));
+
+ return stmt(anyOf(CallArgMatcher, CastOperandMatcher));
+  // FIXME: any more cases? (UPC excludes the RHS of an assignment.  For now we
+  // don't have to check that.)
+}
 } // namespace clang::ast_matchers
 
 namespace {
@@ -143,15 +175,6 @@
 class Strategy;
 } // namespace
 
-// Because we're dealing with raw pointers, let's define what we mean by that.
-static auto hasPointerType() {
-    return hasType(hasCanonicalType(pointerType()));
-}
-
-static auto hasArrayType() {
-    return hasType(hasCanonicalType(arrayType()));
-}
-
 namespace {
 /// Gadget is an individual operation in the code that may be of interest to
 /// this analysis. Each (non-abstract) subclass corresponds to a specific
@@ -449,6 +472,50 @@
     return {};
   }
 };
+
+// Represents expressions of the form `&DRE[any]` in the Unspecified Pointer
+// Context (see `isInUnspecifiedPointerContext`).
+// Note here `[]` is the built-in subscript operator.
+class UPCAddressofArraySubscriptGadget : public FixableGadget {
+private:
+  static constexpr const char *const UPCAddressofArraySubscriptTag =
+      "AddressofArraySubscriptUnderUPC";
+  const UnaryOperator *Node; // the `&DRE[any]` node
+
+public:
+  UPCAddressofArraySubscriptGadget(const MatchFinder::MatchResult &Result)
+      : FixableGadget(Kind::ULCArraySubscript),
+        Node(Result.Nodes.getNodeAs<UnaryOperator>(
+            UPCAddressofArraySubscriptTag)) {
+    assert(Node != nullptr && "Expecting a non-null matching result");
+  }
+
+  static bool classof(const Gadget *G) {
+    return G->getKind() == Kind::UPCAddressofArraySubscript;
+  }
+
+  static Matcher matcher() {
+    return expr(isInUnspecifiedPointerContext(expr(ignoringImpCasts(
+        unaryOperator(hasOperatorName("&"),
+                      hasUnaryOperand(arraySubscriptExpr(
+                          hasBase(ignoringParenImpCasts(declRefExpr())))))
+            .bind(UPCAddressofArraySubscriptTag)))));
+  }
+
+  virtual std::optional<FixItList> getFixits(const Strategy &) const override;
+
+  virtual const Stmt *getBaseStmt() const override { return Node; }
+
+  virtual DeclUseList getClaimedVarUseSites() const override {
+    if (const auto *ArraySubst =
+            dyn_cast<ArraySubscriptExpr>(Node->getSubExpr()))
+      if (const auto *DRE =
+              dyn_cast<DeclRefExpr>(ArraySubst->getBase()->IgnoreImpCasts())) {
+        return {DRE};
+      }
+    return {};
+  }
+};
 } // namespace
 
 namespace {
@@ -753,6 +820,28 @@
   return std::nullopt;
 }
 
+static std::optional<FixItList> // forward declaration
+fixUPCAddressofArraySubscriptWithSpan(const UnaryOperator *Node);
+
+std::optional<FixItList>
+UPCAddressofArraySubscriptGadget::getFixits(const Strategy &S) const {
+  auto DREs = getClaimedVarUseSites();
+
+  if (DREs.size() == 1)
+    if (const auto *VD = dyn_cast<VarDecl>(DREs.front()->getDecl())) {
+      switch (S.lookup(VD)) {
+      case Strategy::Kind::Span:
+        return fixUPCAddressofArraySubscriptWithSpan(Node);
+      case Strategy::Kind::Wontfix:
+      case Strategy::Kind::Iterator:
+      case Strategy::Kind::Array:
+      case Strategy::Kind::Vector:
+        llvm_unreachable("unsupported strategies for FixableGadgets");
+      }
+    }
+  return std::nullopt; // something went wrong, no fix-it
+}
+
 // Return the text representation of the given `APInt Val`:
 static std::string getAPIntText(APInt Val) {
   SmallVector<char> Txt;
@@ -786,6 +875,32 @@
       LangOpts);
 }
 
+// Generates fix-its replacing an expression of the form `&DRE[e]` with
+// `(DRE.data() + e)`:
+static std::optional<FixItList>
+fixUPCAddressofArraySubscriptWithSpan(const UnaryOperator *Node) {
+  if (const auto *ArraySub = dyn_cast<ArraySubscriptExpr>(Node->getSubExpr()))
+    if (const auto *DRE =
+            dyn_cast<DeclRefExpr>(ArraySub->getBase()->IgnoreImpCasts())) {
+      // FIXME: this `getASTContext` call is costy, we should pass the
+      // ASTContext in:
+      const ASTContext &Ctx = DRE->getDecl()->getASTContext();
+      const Expr *Idx = ArraySub->getIdx();
+      const SourceManager &SM = Ctx.getSourceManager();
+      const LangOptions &LangOpts = Ctx.getLangOpts();
+      SmallString<32> StrBuffer;
+
+      StrBuffer.append("(");
+      StrBuffer.append(getExprText(DRE, SM, LangOpts));
+      StrBuffer.append(".data() + ");
+      StrBuffer.append(getExprText(Idx, SM, LangOpts));
+      StrBuffer.append(")");
+      return FixItList{FixItHint::CreateReplacement(Node->getSourceRange(),
+                                           StrBuffer.str())};
+    }
+  return std::nullopt; // something went wrong. no fix-it
+}
+
 // For a non-null initializer `Init` of `T *` type, this function returns
 // `FixItHint`s producing a list initializer `{Init,  S}` as a part of a fix-it
 // to output stream.
Index: clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
===================================================================
--- clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
+++ clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def
@@ -30,7 +30,8 @@
 WARNING_GADGET(ArraySubscript)
 WARNING_GADGET(PointerArithmetic)
 WARNING_GADGET(UnsafeBufferUsageAttr)
-FIXABLE_GADGET(ULCArraySubscript)
+FIXABLE_GADGET(ULCArraySubscript)          // `DRE[any]` in an Unspecified Lvalue Context
+FIXABLE_GADGET(UPCAddressofArraySubscript) // '&DRE[any]' in an Unspecified Pointer Context
 
 #undef FIXABLE_GADGET
 #undef WARNING_GADGET
Index: clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
===================================================================
--- clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
+++ clang/include/clang/Analysis/Analyses/UnsafeBufferUsage.h
@@ -40,7 +40,7 @@
 
   /// Returns the text indicating that the user needs to provide input there:
   virtual std::string
-  getUserFillPlaceHolder(StringRef HintTextToUser = "placeholder") {
+  getUserFillPlaceHolder(StringRef HintTextToUser = "placeholder") const {
     std::string s = std::string("<# ");
     s += HintTextToUser;
     s += " #>";
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to