https://github.com/Pppp1116 created 
https://github.com/llvm/llvm-project/pull/190446

None

>From efe75bd3f7ceb9fc9ae7b82d344e120e84e9e418 Mon Sep 17 00:00:00 2001
From: Pppp1116 <[email protected]>
Date: Sat, 4 Apr 2026 08:06:21 +0100
Subject: [PATCH] Warn on small string-buffer allocations

---
 .../clang/Basic/DiagnosticSemaKinds.td        |   8 +
 clang/lib/Sema/AnalysisBasedWarnings.cpp      | 253 ++++++++++++++++++
 ...warn-fortify-cstring-allocation-tracking.c |  40 +++
 3 files changed, 301 insertions(+)
 create mode 100644 clang/test/Sema/warn-fortify-cstring-allocation-tracking.c

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index eddf9c50033e1..7a439b2137430 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -965,6 +965,14 @@ def warn_fortify_strlen_overflow: Warning<
   " but the source string has length %2 (including NUL byte)">,
   InGroup<FortifySource>;
 
+def warn_fortify_allocation_maybe_small_for_cstring : Warning<
+  "allocation of '%0' bytes may be insufficient for null-terminated string; "
+  "consider '%0 + 1'">, InGroup<FortifySource>;
+
+def warn_fortify_realloc_maybe_removes_null_space : Warning<
+  "realloc may remove space required for null terminator; expected at least "
+  "%0 + 1">, InGroup<FortifySource>;
+
 def subst_format_overflow : TextSubstitution<
   "'%0' will always overflow; destination buffer has size %1,"
   " but format string expands to at least %2">;
diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp 
b/clang/lib/Sema/AnalysisBasedWarnings.cpp
index 37ed7488bb927..9629d35bbf05c 100644
--- a/clang/lib/Sema/AnalysisBasedWarnings.cpp
+++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp
@@ -51,6 +51,7 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/STLFunctionalExtras.h"
@@ -58,6 +59,7 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/TimeProfiler.h"
 #include <algorithm>
 #include <deque>
@@ -2851,6 +2853,250 @@ void 
sema::AnalysisBasedWarnings::issueWarningsForRegisteredVarDecl(
       S, AC, std::make_pair(SecondRange.begin(), SecondRange.end()));
 }
 
+namespace {
+
+static unsigned getDiagnosedBuiltinID(const FunctionDecl *FD) {
+  if (!FD)
+    return 0;
+
+  if (const auto *DABAttr = FD->getAttr<DiagnoseAsBuiltinAttr>())
+    FD = DABAttr->getFunction();
+
+  return FD ? FD->getBuiltinID(/*ConsiderWrappers=*/true) : 0;
+}
+
+static bool isCharacterPointerType(QualType Ty) {
+  const auto *PT = Ty->getAs<PointerType>();
+  return PT && PT->getPointeeType().getUnqualifiedType()->isCharType();
+}
+
+static const VarDecl *getCharacterPointerVar(const Expr *E) {
+  const auto *DRE = dyn_cast<DeclRefExpr>(E->IgnoreParenCasts());
+  const auto *VD = DRE ? dyn_cast<VarDecl>(DRE->getDecl()) : nullptr;
+  if (!VD || !isCharacterPointerType(VD->getType()))
+    return nullptr;
+  return VD;
+}
+
+static const Expr *getDirectStrlenArgument(const Expr *E) {
+  const auto *CE = dyn_cast<CallExpr>(E->IgnoreParenCasts());
+  if (!CE || CE->getNumArgs() != 1)
+    return nullptr;
+
+  switch (getDiagnosedBuiltinID(CE->getDirectCallee())) {
+  case Builtin::BIstrlen:
+  case Builtin::BI__builtin_strlen:
+    return CE->getArg(0)->IgnoreParenCasts();
+  default:
+    return nullptr;
+  }
+}
+
+static bool isLiteralOne(const Expr *E) {
+  const auto *IL = dyn_cast<IntegerLiteral>(E->IgnoreParenCasts());
+  return IL && IL->getValue() == 1;
+}
+
+static const Expr *getDirectStrlenArgumentWithOptionalPlusOne(const Expr *E) {
+  E = E->IgnoreParenCasts();
+  if (const Expr *Arg = getDirectStrlenArgument(E))
+    return Arg;
+
+  const auto *BO = dyn_cast<BinaryOperator>(E);
+  if (!BO || BO->getOpcode() != BO_Add)
+    return nullptr;
+
+  const Expr *LHS = BO->getLHS()->IgnoreParenCasts();
+  const Expr *RHS = BO->getRHS()->IgnoreParenCasts();
+  if (isLiteralOne(LHS))
+    return getDirectStrlenArgument(RHS);
+  if (isLiteralOne(RHS))
+    return getDirectStrlenArgument(LHS);
+  return nullptr;
+}
+
+static bool areSameSimpleDeclRef(const Expr *E1, const Expr *E2) {
+  const auto *D1 = dyn_cast<DeclRefExpr>(E1->IgnoreParenCasts());
+  const auto *D2 = dyn_cast<DeclRefExpr>(E2->IgnoreParenCasts());
+  return D1 && D2 && D1->getDecl() == D2->getDecl();
+}
+
+static std::string getExprAsString(const Expr *E,
+                                   const PrintingPolicy &Policy) {
+  SmallString<64> Text;
+  llvm::raw_svector_ostream OS(Text);
+  E->printPretty(OS, nullptr, Policy);
+  return std::string(OS.str());
+}
+
+static FixItHint getPlusOneFixIt(const Expr *E, const PrintingPolicy &Policy) {
+  const Expr *SimpleExpr = E->IgnoreParenCasts();
+  if (!isa<DeclRefExpr>(SimpleExpr))
+    return {};
+
+  SourceRange Range = E->getSourceRange();
+  if (Range.getBegin().isMacroID() || Range.getEnd().isMacroID())
+    return {};
+
+  SmallString<32> Replacement;
+  llvm::raw_svector_ostream OS(Replacement);
+  E->printPretty(OS, nullptr, Policy);
+  OS << " + 1";
+  return FixItHint::CreateReplacement(Range, OS.str());
+}
+
+struct TrackedCStringAllocation {
+  const Expr *SizeExpr = nullptr;
+  const CallExpr *AllocCall = nullptr;
+  bool IsRealloc = false;
+  bool Warned = false;
+};
+
+class CStringAllocationVisitor : public StmtVisitor<CStringAllocationVisitor> {
+  Sema &S;
+  llvm::DenseMap<const VarDecl *, TrackedCStringAllocation> Tracked;
+  llvm::DenseSet<const VarDecl *> KnownStringBuffers;
+
+  void VisitChildren(Stmt *Node) {
+    for (Stmt *Child : Node->children())
+      if (Child)
+        Visit(Child);
+  }
+
+  void DiagnoseTrackedAllocation(const VarDecl *VD) {
+    auto It = Tracked.find(VD);
+    if (It == Tracked.end() || It->second.Warned)
+      return;
+
+    TrackedCStringAllocation &Info = It->second;
+    std::string SizeText =
+        getExprAsString(Info.SizeExpr, S.getPrintingPolicy());
+    S.Diag(Info.AllocCall->getBeginLoc(),
+           Info.IsRealloc
+               ? diag::warn_fortify_realloc_maybe_removes_null_space
+               : diag::warn_fortify_allocation_maybe_small_for_cstring)
+        << SizeText << getPlusOneFixIt(Info.SizeExpr, S.getPrintingPolicy());
+    Info.Warned = true;
+  }
+
+  void MarkAsStringBuffer(const Expr *E) {
+    const VarDecl *VD = getCharacterPointerVar(E);
+    if (!VD)
+      return;
+
+    KnownStringBuffers.insert(VD);
+    DiagnoseTrackedAllocation(VD);
+  }
+
+  std::optional<TrackedCStringAllocation>
+  getTrackedAllocation(const Expr *E, const VarDecl *AssignedVar) const {
+    const auto *CE = dyn_cast<CallExpr>(E->IgnoreParenCasts());
+    if (!CE)
+      return std::nullopt;
+
+    switch (getDiagnosedBuiltinID(CE->getDirectCallee())) {
+    case Builtin::BImalloc:
+    case Builtin::BI__builtin_malloc: {
+      if (CE->getNumArgs() != 1)
+        return std::nullopt;
+
+      const Expr *SizeArg = CE->getArg(0)->IgnoreParenCasts();
+      if (isa<DeclRefExpr>(SizeArg))
+        return TrackedCStringAllocation{SizeArg, CE, false, false};
+      return std::nullopt;
+    }
+    case Builtin::BIrealloc:
+    case Builtin::BI__builtin_realloc: {
+      if (CE->getNumArgs() != 2 || !AssignedVar)
+        return std::nullopt;
+
+      const Expr *SizeArg = CE->getArg(1)->IgnoreParenCasts();
+      const VarDecl *ReallocVar = getCharacterPointerVar(CE->getArg(0));
+      if (ReallocVar == AssignedVar && isa<DeclRefExpr>(SizeArg))
+        return TrackedCStringAllocation{SizeArg, CE, true, false};
+      return std::nullopt;
+    }
+    default:
+      return std::nullopt;
+    }
+  }
+
+  void UpdateTrackedAllocation(const VarDecl *VD, const Expr *Init) {
+    if (!VD || !Init || !isCharacterPointerType(VD->getType()))
+      return;
+
+    if (std::optional<TrackedCStringAllocation> Info =
+            getTrackedAllocation(Init, VD)) {
+      Tracked[VD] = *Info;
+      if (KnownStringBuffers.contains(VD))
+        DiagnoseTrackedAllocation(VD);
+      return;
+    }
+
+    Tracked.erase(VD);
+  }
+
+public:
+  explicit CStringAllocationVisitor(Sema &S) : S(S) {}
+
+  void VisitStmt(Stmt *Node) { VisitChildren(Node); }
+
+  void VisitLambdaExpr(LambdaExpr *) {}
+
+  void VisitBlockExpr(BlockExpr *) {}
+
+  void VisitDeclStmt(DeclStmt *DS) {
+    for (Decl *D : DS->decls())
+      if (auto *VD = dyn_cast<VarDecl>(D))
+        UpdateTrackedAllocation(VD, VD->getInit());
+    VisitChildren(DS);
+  }
+
+  void VisitBinaryOperator(BinaryOperator *BO) {
+    if (BO->getOpcode() == BO_Assign)
+      if (const auto *VD = getCharacterPointerVar(BO->getLHS()))
+        UpdateTrackedAllocation(VD, BO->getRHS());
+    VisitChildren(BO);
+  }
+
+  void VisitCallExpr(CallExpr *CE) {
+    switch (getDiagnosedBuiltinID(CE->getDirectCallee())) {
+    case Builtin::BIstrlen:
+    case Builtin::BI__builtin_strlen:
+      if (CE->getNumArgs() == 1)
+        MarkAsStringBuffer(CE->getArg(0));
+      break;
+    case Builtin::BIstrcpy:
+    case Builtin::BI__builtin_strcpy:
+    case Builtin::BIstrcat:
+    case Builtin::BI__builtin_strcat:
+      if (CE->getNumArgs() == 2)
+        MarkAsStringBuffer(CE->getArg(0));
+      break;
+    case Builtin::BImemcpy:
+    case Builtin::BI__builtin_memcpy:
+      if (CE->getNumArgs() != 3)
+        break;
+
+      if (const Expr *StrlenArg =
+              getDirectStrlenArgumentWithOptionalPlusOne(CE->getArg(2));
+          StrlenArg && areSameSimpleDeclRef(StrlenArg, CE->getArg(1))) {
+        MarkAsStringBuffer(CE->getArg(0));
+      }
+      break;
+    default:
+      break;
+    }
+    VisitChildren(CE);
+  }
+};
+
+static void CheckCStringBufferAllocations(Sema &S, const Stmt *Body) {
+  CStringAllocationVisitor(S).Visit(const_cast<Stmt *>(Body));
+}
+
+} // namespace
+
 // An AST Visitor that calls a callback function on each callable DEFINITION
 // that is NOT in a dependent context:
 class CallableVisitor : public DynamicRecursiveASTVisitor {
@@ -3022,6 +3268,13 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings(
   const Stmt *Body = D->getBody();
   assert(Body);
 
+  if (!Diags.isIgnored(diag::warn_fortify_allocation_maybe_small_for_cstring,
+                       D->getBeginLoc()) ||
+      !Diags.isIgnored(diag::warn_fortify_realloc_maybe_removes_null_space,
+                       D->getBeginLoc())) {
+    CheckCStringBufferAllocations(S, Body);
+  }
+
   // Construct the analysis context with the specified CFG build options.
   AnalysisDeclContext AC(/* AnalysisDeclContextManager */ nullptr, D);
 
diff --git a/clang/test/Sema/warn-fortify-cstring-allocation-tracking.c 
b/clang/test/Sema/warn-fortify-cstring-allocation-tracking.c
new file mode 100644
index 0000000000000..be81990ace89d
--- /dev/null
+++ b/clang/test/Sema/warn-fortify-cstring-allocation-tracking.c
@@ -0,0 +1,40 @@
+// RUN: %clang_cc1 -fsyntax-only -Wfortify-source -verify %s
+
+typedef __SIZE_TYPE__ size_t;
+
+void *malloc(size_t);
+void *realloc(void *, size_t);
+void *memcpy(void *, const void *, size_t);
+char *strcpy(char *, const char *);
+size_t strlen(const char *);
+
+void malloc_len_then_strcpy(const char *src, size_t len) {
+  char *s = malloc(len); // expected-warning{{allocation of 'len' bytes may be 
insufficient for null-terminated string; consider 'len + 1'}}
+  strcpy(s, src);
+}
+
+void malloc_len_non_string(char *dst, const char *src, size_t len) {
+  char *buf = malloc(len);
+  memcpy(buf, src, len);
+  memcpy(dst, buf, len);
+}
+
+void malloc_complex_size(const char *src, size_t len) {
+  char *s = malloc(len * 2);
+  strcpy(s, src);
+}
+
+void realloc_len_then_strcpy(char *s, const char *src, size_t len) {
+  s = realloc(s, len); // expected-warning{{realloc may remove space required 
for null terminator; expected at least len + 1}}
+  strcpy(s, src);
+}
+
+void realloc_ok(char *s, const char *src, size_t len) {
+  s = realloc(s, len + 1);
+  strcpy(s, src);
+}
+
+void malloc_len_then_memcpy_string(const char *src, size_t len) {
+  char *s = malloc(len); // expected-warning{{allocation of 'len' bytes may be 
insufficient for null-terminated string; consider 'len + 1'}}
+  memcpy(s, src, strlen(src) + 1);
+}

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to