https://github.com/steakhal created 
https://github.com/llvm/llvm-project/pull/93408

This change enables more accurate modeling of the write effects of `fread`. In 
particular, instead of invalidating the whole buffer, in a best-effort basis, 
we would try to invalidate the actually accesses elements of the buffer. This 
preserves the previous value of the buffer of the unaffected slots. As a 
result, diagnose more uninitialized buffer uses for example.

Currently, this refined invalidation only triggers for `fread` if and only if 
the `count` parameter and the buffer pointer's index component are concrete or 
perfectly-constrained symbols.
Additionally, if the `fread` would read more than 64 elements, the whole buffer 
is invalidated as before. This is to have safeguards against performance issues.

Refer to the comments of the assertions in the following example to see the 
changes in the diagnostics:

```c++
void demo() {
  FILE *fp = fopen("/home/test", "rb+");
  if (!fp) return;
  int buffer[10]; // uninitialized
  int read_items = fread(buffer+1, sizeof(int), 5, fp);
  if (5 == read_items) {
    int v1 = buffer[1]; // Unknown value but not garbage.
    clang_analyzer_isTainted(v1); // expected-warning {{YES}} <-- Would be "NO" 
without this patch.
    clang_analyzer_dump(v1); // expected-warning {{conj_}} <-- Not a "derived" 
symbol, so it's directly invalidated now.
    int v0 = buffer[0]; // expected-warning {{Assigned value is garbage or 
undefined}} <-- Had no report here before.
    (void)(v1 + v0);
  } else {
    // If 'fread' had an error.
    int v0 = buffer[0]; // expected-warning {{Assigned value is garbage or 
undefined}} <-- Had no report here before.
    (void)v0;
  }
  fclose(fp);
}
```

[CPP-3247](https://sonarsource.atlassian.net/browse/CPP-3247)

Patch by Marco Borgeaud (marco-antognini-sonarsource)

>From f9e841ddaa865d529c806b2d115d5ddbc7109243 Mon Sep 17 00:00:00 2001
From: Balazs Benics <benicsbal...@gmail.com>
Date: Sun, 26 May 2024 11:40:01 +0200
Subject: [PATCH] [analyzer] Refine invalidation caused by `fread`

This change enables more accurate modeling of the write effects of `fread`.
In particular, instead of invalidating the whole buffer, in a best-effort
basis, we would try to invalidate the actually accesses elements of the buffer.
This preserves the previous value of the buffer of the unaffected slots.
As a result, diagnose more uninitialized buffer uses for example.

Currently, this refined invalidation only triggers for `fread` if and
only if the `count` parameter and the buffer pointer's index component
are concrete or perfectly-constrained symbols.
Additionally, if the `fread` would read more than 64 elements, the whole
buffer is invalidated as before. This is to have safeguards against
performance issues.

Refer to the comments of the assertions in the following example to see
the changes in the diagnostics:

```c++
void demo() {
  FILE *fp = fopen("/home/test", "rb+");
  if (!fp) return;
  int buffer[10]; // uninitialized
  int read_items = fread(buffer+1, sizeof(int), 5, fp);
  if (5 == read_items) {
    int v1 = buffer[1]; // Unknown value but not garbage.
    clang_analyzer_isTainted(v1); // expected-warning {{YES}} <-- Would be "NO" 
without this patch.
    clang_analyzer_dump(v1); // expected-warning {{conj_}} <-- Not a "derived" 
symbol, so it's directly invalidated now.
    int v0 = buffer[0]; // expected-warning {{Assigned value is garbage or 
undefined}} <-- Had no report here before.
    (void)(v1 + v0);
  } else {
    // If 'fread' had an error.
    int v0 = buffer[0]; // expected-warning {{Assigned value is garbage or 
undefined}} <-- Had no report here before.
    (void)v0;
  }
  fclose(fp);
}
```

[CPP-3247](https://sonarsource.atlassian.net/browse/CPP-3247)

Patch by Marco Borgeaud (marco-antognini-sonarsource)
---
 .../StaticAnalyzer/Checkers/StreamChecker.cpp |  88 ++++-
 clang/test/Analysis/fread.cpp                 | 328 ++++++++++++++++++
 2 files changed, 405 insertions(+), 11 deletions(-)
 create mode 100644 clang/test/Analysis/fread.cpp

diff --git a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp 
b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
index d4e020f7a72a0..7b42c4f72b322 100644
--- a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
@@ -717,18 +717,71 @@ const ExplodedNode 
*StreamChecker::getAcquisitionSite(const ExplodedNode *N,
   return nullptr;
 }
 
+/// Invalidate only the requested elements instead of the whole buffer.
+/// This is basically a refinement of the more generic 'escapeArgs' or
+/// the plain old 'invalidateRegions'.
+/// This only works if the \p StartIndex and \p Count are concrete or
+/// perfectly-constrained.
+static ProgramStateRef
+escapeByStartIndexAndCount(ProgramStateRef State, CheckerContext &C,
+                           const CallEvent &Call, const MemRegion *Buffer,
+                           QualType ElemType, SVal StartIndex, SVal Count) {
+  if (!llvm::isa_and_nonnull<SubRegion>(Buffer))
+    return State;
+
+  auto UnboxAsInt = [&C, &State](SVal V) -> std::optional<int64_t> {
+    auto &SVB = C.getSValBuilder();
+    if (const llvm::APSInt *Int = SVB.getKnownValue(State, V))
+      return Int->tryExtValue();
+    return std::nullopt;
+  };
+
+  auto StartIndexVal = UnboxAsInt(StartIndex);
+  auto CountVal = UnboxAsInt(Count);
+
+  // FIXME: Maybe we could make this more generic, and expose this by the
+  // 'invalidateRegions' API. After doing so, it might make sense to make this
+  // limit configurable.
+  constexpr int MaxInvalidatedElementsLimit = 64;
+  if (!StartIndexVal || !CountVal || *CountVal > MaxInvalidatedElementsLimit) {
+    return State->invalidateRegions({loc::MemRegionVal{Buffer}},
+                                    Call.getOriginExpr(), C.blockCount(),
+                                    C.getLocationContext(),
+                                    /*CausesPointerEscape=*/false);
+  }
+
+  constexpr auto DoNotInvalidateSuperRegion =
+      RegionAndSymbolInvalidationTraits::InvalidationKinds::
+          TK_DoNotInvalidateSuperRegion;
+
+  auto &RegionManager = Buffer->getMemRegionManager();
+  SmallVector<SVal> EscapingVals;
+  EscapingVals.reserve(*CountVal);
+
+  RegionAndSymbolInvalidationTraits ITraits;
+  for (auto Idx : llvm::seq(*StartIndexVal, *StartIndexVal + *CountVal)) {
+    NonLoc Index = C.getSValBuilder().makeArrayIndex(Idx);
+    const auto *Element = RegionManager.getElementRegion(
+        ElemType, Index, cast<SubRegion>(Buffer), C.getASTContext());
+    EscapingVals.push_back(loc::MemRegionVal(Element));
+    ITraits.setTrait(Element, DoNotInvalidateSuperRegion);
+  }
+  return State->invalidateRegions(EscapingVals, Call.getOriginExpr(),
+                                  C.blockCount(), C.getLocationContext(),
+                                  /*CausesPointerEscape=*/false,
+                                  /*InvalidatedSymbols=*/nullptr, &Call,
+                                  &ITraits);
+}
+
 static ProgramStateRef escapeArgs(ProgramStateRef State, CheckerContext &C,
                                   const CallEvent &Call,
                                   ArrayRef<unsigned int> EscapingArgs) {
-  const auto *CE = Call.getOriginExpr();
-
-  SmallVector<SVal> EscapingVals;
-  EscapingVals.reserve(EscapingArgs.size());
-  for (auto EscArgIdx : EscapingArgs)
-    EscapingVals.push_back(Call.getArgSVal(EscArgIdx));
-  State = State->invalidateRegions(EscapingVals, CE, C.blockCount(),
-                                   C.getLocationContext(),
-                                   /*CausesPointerEscape=*/false);
+  auto GetArgSVal = [&Call](int Idx) { return Call.getArgSVal(Idx); };
+  auto EscapingVals = to_vector(map_range(EscapingArgs, GetArgSVal));
+  State = State->invalidateRegions(EscapingVals, Call.getOriginExpr(),
+                                   C.blockCount(), C.getLocationContext(),
+                                   /*CausesPointerEscape=*/false,
+                                   /*InvalidatedSymbols=*/nullptr);
   return State;
 }
 
@@ -937,8 +990,21 @@ void StreamChecker::evalFreadFwrite(const FnDescription 
*Desc,
 
   // At read, invalidate the buffer in any case of error or success,
   // except if EOF was already present.
-  if (IsFread && !E.isStreamEof())
-    State = escapeArgs(State, C, Call, {0});
+  if (IsFread && !E.isStreamEof()) {
+    // Try to invalidate the individual elements.
+    if (const auto *BufferFirstElem =
+            dyn_cast_or_null<ElementRegion>(Call.getArgSVal(0).getAsRegion())) 
{
+      const MemRegion *Buffer = BufferFirstElem->getSuperRegion();
+      QualType ElemTy = BufferFirstElem->getElementType();
+      SVal FirstAccessedItem = BufferFirstElem->getIndex();
+      SVal ItemCount = Call.getArgSVal(2);
+      State = escapeByStartIndexAndCount(State, C, Call, Buffer, ElemTy,
+                                         FirstAccessedItem, ItemCount);
+    } else {
+      // Otherwise just fall back to invalidating the whole buffer.
+      State = escapeArgs(State, C, Call, {0});
+    }
+  }
 
   // Generate a transition for the success state.
   // If we know the state to be FEOF at fread, do not add a success state.
diff --git a/clang/test/Analysis/fread.cpp b/clang/test/Analysis/fread.cpp
new file mode 100644
index 0000000000000..2bf9baefe1395
--- /dev/null
+++ b/clang/test/Analysis/fread.cpp
@@ -0,0 +1,328 @@
+// RUN: %clang_analyze_cc1 -verify %s \
+// RUN:   -analyzer-checker=core,unix.Stream,alpha.security.taint \
+// RUN:   -analyzer-checker=debug.ExprInspection
+
+#define EOF (-1)
+
+extern "C" {
+typedef __typeof(sizeof(int)) size_t;
+typedef struct _FILE FILE;
+
+FILE *fopen(const char *filename, const char *mode);
+int fclose(FILE *stream);
+size_t fread(void *buffer, size_t size, size_t count, FILE *stream);
+int fgetc(FILE *stream);
+void *malloc(size_t size);
+}
+
+void clang_analyzer_dump(int);
+void clang_analyzer_isTainted(int);
+void clang_analyzer_warnIfReached();
+
+// A stream is only tracked by StreamChecker if it results from a call to 
"fopen".
+// Otherwise, there is no specific modelling of "fread".
+void untracked_stream(FILE *fp) {
+  char c;
+  if (1 == fread(&c, 1, 1, fp)) {
+    char p = c; // Unknown value but not garbage and not modeled by checker.
+  } else {
+    char p = c; // Possibly indeterminate value but not modeled by checker.
+  }
+}
+
+void fgetc_props_taint() {
+  if (FILE *fp = fopen("/home/test", "rb+")) {
+    int c = fgetc(fp); // c is tainted.
+    if (c != EOF) {
+      clang_analyzer_isTainted(c); // expected-warning{{YES}}
+    }
+    fclose(fp);
+  }
+}
+
+void fread_props_taint() {
+  if (FILE *fp = fopen("/home/test", "rb+")) {
+    char buffer[10];
+    int c = fread(buffer, 1, 10, fp); // c is tainted.
+    if (c != 10) {
+      // If the read failed, then the number of bytes successfully read should 
be tainted.
+      clang_analyzer_isTainted(c); // expected-warning{{YES}}
+    }
+    fclose(fp);
+  }
+}
+
+void read_one_byte1() {
+  if (FILE *fp = fopen("/home/test", "rb+")) {
+    char c;
+    if (1 == fread(&c, 1, 1, fp)) {
+      char p = c; // Unknown value but not garbage.
+      clang_analyzer_isTainted(p); // expected-warning{{YES}}
+    } else {
+      char p = c; // Possibly indeterminate value but not modeled by checker.
+      clang_analyzer_isTainted(p); // expected-warning{{YES}}
+    }
+    fclose(fp);
+  }
+}
+
+void read_one_byte2(char *buffer) {
+  if (FILE *fp = fopen("/home/test", "rb+")) {
+    if (1 == fread(buffer, 1, 1, fp)) {
+      char p = buffer[0]; // Unknown value but not garbage.
+      clang_analyzer_isTainted(p); // expected-warning{{YES}}
+    } else {
+      char p = buffer[0]; // Possibly indeterminate value but not modeled by 
checker.
+      clang_analyzer_isTainted(p); // expected-warning{{YES}}
+    }
+    fclose(fp);
+  }
+}
+
+void read_one_byte3(char *buffer) {
+  buffer[1] = 10;
+  if (FILE *fp = fopen("/home/test", "rb+")) {
+    // buffer[1] is not mutated by fread and remains not tainted.
+    fread(buffer, 1, 1, fp);
+    char p = buffer[1];
+    clang_analyzer_isTainted(p); // expected-warning{{NO}}
+    clang_analyzer_dump(buffer[1]); // expected-warning{{derived_}} FIXME This 
should be 10.
+    fclose(fp);
+  }
+}
+
+void read_many_bytes(char *buffer) {
+  if (FILE *fp = fopen("/home/test", "rb+")) {
+    if (42 == fread(buffer, 1, 42, fp)) {
+      char p = buffer[0]; // Unknown value but not garbage.
+      clang_analyzer_isTainted(p); // expected-warning{{YES}}
+    } else {
+      char p = buffer[0]; // Possibly indeterminate value but not modeled.
+      clang_analyzer_isTainted(p); // expected-warning{{YES}}
+    }
+    fclose(fp);
+  }
+}
+
+void random_access_write1(int index) {
+  if (FILE *fp = fopen("/home/test", "rb+")) {
+    long c[4];
+    bool success = 2 == fread(c + 1, sizeof(long), 2, fp);
+
+    switch (index) {
+    case 0:
+      // c[0] is not mutated by fread.
+      if (success) {
+        char p = c[0]; // expected-warning {{Assigned value is garbage or 
undefined}} We kept the first byte intact.
+      } else {
+        char p = c[0]; // expected-warning {{Assigned value is garbage or 
undefined}} We kept the first byte intact.
+      }
+      break;
+
+    case 1:
+      if (success) {
+        // Unknown value but not garbage.
+        clang_analyzer_isTainted(c[1]); // expected-warning {{YES}}
+        clang_analyzer_dump(c[1]); // expected-warning {{conj_}}
+      } else {
+        // Possibly indeterminate value but not modeled.
+        clang_analyzer_isTainted(c[1]); // expected-warning {{YES}}
+        clang_analyzer_dump(c[1]); // expected-warning {{conj_}}
+      }
+      break;
+
+    case 2:
+      if (success) {
+        long p = c[2]; // Unknown value but not garbage.
+        // FIXME: Taint analysis only marks the first byte of a memory region. 
See getPointeeOf in GenericTaintChecker.cpp.
+        clang_analyzer_isTainted(c[2]); // expected-warning {{NO}}
+        clang_analyzer_dump(c[2]); // expected-warning {{conj_}}
+      } else {
+        // Possibly indeterminate value but not modeled.
+        clang_analyzer_isTainted(c[2]); // expected-warning {{NO}} // FIXME: 
See above.
+        clang_analyzer_dump(c[2]); // expected-warning {{conj_}}
+      }
+      break;
+
+    case 3:
+      // c[3] is not mutated by fread.
+      if (success) {
+        long p = c[3]; // expected-warning {{Assigned value is garbage or 
undefined}}
+      } else {
+        long p = c[3]; // expected-warning {{Assigned value is garbage or 
undefined}}
+      }
+      break;
+    }
+
+    fclose(fp);
+  }
+}
+
+void random_access_write2(bool b) {
+  if (FILE *fp = fopen("/home/test", "rb+")) {
+    int buffer[10];
+    int *ptr = buffer + 2;
+    if (5 == fread(ptr - 1, sizeof(int), 5, fp)) {
+      if (b) {
+        int p = buffer[1]; // Unknown value but not garbage.
+        clang_analyzer_isTainted(p); // expected-warning {{YES}}
+        clang_analyzer_dump(p); // expected-warning {{conj_}}
+      } else {
+        int p = buffer[0]; // expected-warning {{Assigned value is garbage or 
undefined}}
+      }
+    } else {
+      int p = buffer[0]; // expected-warning {{Assigned value is garbage or 
undefined}}
+    }
+    fclose(fp);
+  }
+}
+
+void random_access_write_symbolic_count(size_t count) {
+  // Cover a case that used to crash (symbolic count).
+  if (count > 2)
+    return;
+
+  if (FILE *fp = fopen("/home/test", "rb+")) {
+    long c[4];
+    fread(c + 1, sizeof(long), count, fp);
+
+    // c[0] and c[3] are never mutated by fread, but because "count" is a 
symbolic value, the checker doesn't know that.
+    long p = c[0];
+    clang_analyzer_isTainted(p); // expected-warning {{NO}}
+    clang_analyzer_dump(p); // expected-warning {{derived_}}
+
+    p = c[3];
+    clang_analyzer_isTainted(p); // expected-warning {{NO}}
+    clang_analyzer_dump(p); // expected-warning {{derived_}}
+
+    p = c[1];
+    clang_analyzer_isTainted(p); // expected-warning {{YES}}
+    clang_analyzer_dump(p); // expected-warning {{derived_}}
+
+    fclose(fp);
+  }
+}
+
+void dynamic_random_access_write(int startIndex) {
+  if (FILE *fp = fopen("/home/test", "rb+")) {
+    long buffer[10];
+    // Cannot reason about index.
+    size_t res = fread(buffer + startIndex, sizeof(long), 5, fp);
+    if (5 == res) {
+      long p = buffer[startIndex];
+      clang_analyzer_isTainted(p); // expected-warning {{NO}}
+      clang_analyzer_dump(p); // expected-warning {{conj_}}
+    } else if (res == 4) {
+      long p = buffer[startIndex];
+      clang_analyzer_isTainted(p); // expected-warning {{NO}}
+      clang_analyzer_dump(p); // expected-warning {{conj_}}
+      p = buffer[startIndex + 1];
+      clang_analyzer_isTainted(p); // expected-warning {{NO}}
+      clang_analyzer_dump(p); // expected-warning {{conj_}}
+      p = buffer[startIndex + 2];
+      clang_analyzer_isTainted(p); // expected-warning {{NO}}
+      clang_analyzer_dump(p); // expected-warning {{conj_}}
+      p = buffer[startIndex + 3];
+      clang_analyzer_isTainted(p); // expected-warning {{NO}}
+      clang_analyzer_dump(p); // expected-warning {{conj_}}
+      p = buffer[startIndex + 4];
+      clang_analyzer_isTainted(p); // expected-warning {{NO}}
+      clang_analyzer_dump(p); // expected-warning {{conj_}}
+      p = buffer[startIndex + 5];
+      clang_analyzer_isTainted(p); // expected-warning {{NO}}
+      clang_analyzer_dump(p); // expected-warning {{conj_}}
+      p = buffer[0];
+      clang_analyzer_isTainted(p); // expected-warning {{NO}}
+      clang_analyzer_dump(p); // expected-warning {{conj_}}
+    } else {
+      long p = buffer[startIndex];
+      clang_analyzer_isTainted(p); // expected-warning {{NO}}
+      clang_analyzer_dump(p); // expected-warning {{conj_}}
+      p = buffer[0];
+      clang_analyzer_isTainted(p); // expected-warning {{NO}}
+      clang_analyzer_dump(p); // expected-warning {{conj_}}
+    }
+    fclose(fp);
+  }
+}
+
+struct S {
+  int a;
+  long b;
+};
+
+void comopund_write1() {
+  if (FILE *fp = fopen("/home/test", "rb+")) {
+    S s; // s.a is not touched by fread.
+    if (1 == fread(&s.b, sizeof(s.b), 1, fp)) {
+      long p = s.b;
+      clang_analyzer_isTainted(p); // expected-warning {{YES}}
+      clang_analyzer_dump(p); // expected-warning {{conj_}}
+    } else {
+      long p = s.b;
+      clang_analyzer_isTainted(p); // expected-warning {{YES}}
+      clang_analyzer_dump(p); // expected-warning {{conj_}}
+    }
+    fclose(fp);
+  }
+}
+
+void comopund_write2() {
+  if (FILE *fp = fopen("/home/test", "rb+")) {
+    S s; // s.a is not touched by fread.
+    if (1 == fread(&s.b, sizeof(s.b), 1, fp)) {
+      long p = s.a; // FIXME: This should raise an uninitialized read.
+      clang_analyzer_isTainted(p); // expected-warning {{NO}} FIXME: This 
should be YES.
+      clang_analyzer_dump(p); // expected-warning {{conj_}}
+    } else {
+      long p = s.a; // FIXME: This should raise an uninitialized read.
+      clang_analyzer_isTainted(p); // expected-warning {{NO}} FIXME: This 
should be YES.
+      clang_analyzer_dump(p); // expected-warning {{conj_}}
+    }
+    fclose(fp);
+  }
+}
+
+void var_write() {
+  if (FILE *fp = fopen("/home/test", "rb+")) {
+    int a, b; // 'a' is not touched by fread.
+    if (1 == fread(&b, sizeof(b), 1, fp)) {
+      long p = a; // expected-warning{{Assigned value is garbage or undefined}}
+    } else {
+      long p = a; // expected-warning{{Assigned value is garbage or undefined}}
+    }
+    fclose(fp);
+  }
+}
+
+// When reading a lot of data, invalidating all elements is too time-consuming.
+// Instead, the knowledge of the whole array is lost.
+#define MaxInvalidatedElementRegion 64 // See StreamChecker::evalFreadFwrite 
in StreamChecker.cpp.
+#define PastMaxComplexity MaxInvalidatedElementRegion + 1
+void test_large_read() {
+  int buffer[PastMaxComplexity + 1];
+  buffer[PastMaxComplexity] = 42;
+  if (FILE *fp = fopen("/home/test", "rb+")) {
+    if (buffer[PastMaxComplexity] != 42) {
+      clang_analyzer_warnIfReached(); // Unreachable.
+    }
+    if (1 == fread(buffer, sizeof(int), PastMaxComplexity, fp)) {
+      if (buffer[PastMaxComplexity] != 42) {
+        clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}}
+      }
+    }
+    fclose(fp);
+  }
+}
+
+void test_small_read() {
+  int buffer[10];
+  buffer[5] = 42;
+  if (FILE *fp = fopen("/home/test", "rb+")) {
+    clang_analyzer_dump(buffer[5]); // expected-warning{{42 S32b}}
+    if (1 == fread(buffer, sizeof(int), 5, fp)) {
+      clang_analyzer_dump(buffer[5]); // expected-warning{{42 S32b}}
+    }
+    fclose(fp);
+  }
+}

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to