dkrupp updated this revision to Diff 498795.
dkrupp added a comment.

Added documentation to the newly introduced types: TaintData, TaintBugReport.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D144269/new/

https://reviews.llvm.org/D144269

Files:
  clang/include/clang/StaticAnalyzer/Checkers/Taint.h
  clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporter.h
  clang/include/clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h
  clang/include/clang/StaticAnalyzer/Core/BugReporter/TaintBugType.h
  clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
  clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp
  clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
  clang/lib/StaticAnalyzer/Checkers/Taint.cpp
  clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp
  clang/lib/StaticAnalyzer/Core/BugReporter.cpp
  clang/lib/StaticAnalyzer/Core/CommonBugCategories.cpp
  
clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-diagnostics-taint-test.c.sarif
  
clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-multi-diagnostic-test.c.sarif
  clang/test/Analysis/diagnostics/sarif-diagnostics-taint-test.c
  clang/test/Analysis/taint-checker-callback-order-has-definition.c
  clang/test/Analysis/taint-checker-callback-order-without-definition.c
  clang/test/Analysis/taint-diagnostic-visitor.c
  clang/test/Analysis/taint-dumps.c

Index: clang/test/Analysis/taint-dumps.c
===================================================================
--- clang/test/Analysis/taint-dumps.c
+++ clang/test/Analysis/taint-dumps.c
@@ -6,7 +6,7 @@
 int getchar(void);
 
 // CHECK: Tainted symbols:
-// CHECK-NEXT: conj_$2{{.*}} : 0
+// CHECK-NEXT: conj_$2{{.*}} : Type:0 Flow:0
 int test_taint_dumps(void) {
   int x = getchar();
   clang_analyzer_printState();
Index: clang/test/Analysis/taint-diagnostic-visitor.c
===================================================================
--- clang/test/Analysis/taint-diagnostic-visitor.c
+++ clang/test/Analysis/taint-diagnostic-visitor.c
@@ -2,8 +2,17 @@
 
 // This file is for testing enhanced diagnostics produced by the GenericTaintChecker
 
+typedef unsigned long size_t;
+struct _IO_FILE;
+typedef struct _IO_FILE FILE;
+
 int scanf(const char *restrict format, ...);
 int system(const char *command);
+char* getenv( const char* env_var );
+size_t strlen( const char* str );
+void *malloc(size_t size );
+char *fgets(char *str, int n, FILE *stream);
+FILE *stdin;
 
 void taintDiagnostic(void)
 {
@@ -34,3 +43,41 @@
   int vla[x]; // expected-warning {{Declared variable-length array (VLA) has tainted size}}
               // expected-note@-1 {{Declared variable-length array (VLA) has tainted size}}
 }
+
+//Tests if the originated note is correctly placed even if the path is
+//propagating through variables and expressions
+char* taintDiagnosticPropagation(){
+  char *pathbuf;
+  char *pathlist=getenv("PATH"); // expected-note {{Taint originated here}}
+  if (pathlist){ // expected-note {{Assuming 'pathlist' is non-null}}
+	               // expected-note@-1 {{Taking true branch}}
+    pathbuf=(char*) malloc(strlen(pathlist)+1); // expected-warning{{Untrusted data is used to specify the buffer size}}
+                                                // expected-note@-1{{Untrusted data is used to specify the buffer size}}
+    return pathbuf;
+  }
+  return 0;
+}
+
+
+//Taint origin should be marked correctly even if there are multiple taint
+//sources in the function
+char* taintDiagnosticPropagation2(){
+  char *pathbuf;
+  char *user_env2=getenv("USER_ENV_VAR2");//unrelated taint source
+  char *pathlist=getenv("PATH"); // expected-note {{Taint originated here}}
+  char *user_env=getenv("USER_ENV_VAR");//unrelated taint source
+  if (pathlist){ // expected-note {{Assuming 'pathlist' is non-null}}
+	               // expected-note@-1 {{Taking true branch}}
+    pathbuf=(char*) malloc(strlen(pathlist)+1); // expected-warning{{Untrusted data is used to specify the buffer size}}
+                                                // expected-note@-1{{Untrusted data is used to specify the buffer size}}
+    return pathbuf;
+  }
+  return 0;
+}
+
+void testReadStdIn(){
+  char buf[1024];
+  fgets(buf, sizeof(buf), stdin);// expected-note {{Taint originated here}}
+  system(buf);// expected-warning {{Untrusted data is passed to a system call}} // expected-note {{Untrusted data is passed to a system call (CERT/STR02-C. Sanitize data passed to complex subsystems)}}
+
+}
Index: clang/test/Analysis/taint-checker-callback-order-without-definition.c
===================================================================
--- clang/test/Analysis/taint-checker-callback-order-without-definition.c
+++ clang/test/Analysis/taint-checker-callback-order-without-definition.c
@@ -13,19 +13,22 @@
 
 void top(const char *fname, char *buf) {
   FILE *fp = fopen(fname, "r"); // Introduce taint.
-  // CHECK:      PreCall<fopen(fname, "r")> prepares tainting arg index: -1
-  // CHECK-NEXT: PostCall<fopen(fname, "r")> actually wants to taint arg index: -1
+  // CHECK:      New taint flow. Flow id: 0
+  // CHECK-NEXT: PreCall<fopen(fname, "r")> prepares tainting arg index: -1 Flow Id: 0
+  // CHECK-NEXT: PostCall<fopen(fname, "r")> actually wants to taint arg index: -1 Flow Id: 0
 
   if (!fp)
     return;
 
   (void)fgets(buf, 42, fp); // Trigger taint propagation.
 
-  // CHECK-NEXT: PreCall<fgets(buf, 42, fp)> prepares tainting arg index: -1
-  // CHECK-NEXT: PreCall<fgets(buf, 42, fp)> prepares tainting arg index: 0
-  // CHECK-NEXT: PreCall<fgets(buf, 42, fp)> prepares tainting arg index: 2
+  // CHECK-NEXT: PreCall<fgets(buf, 42, fp)> prepares tainting arg index: -1 Flow Id: 0
+  // CHECK-NEXT: PreCall<fgets(buf, 42, fp)> prepares tainting arg index: 0 Flow Id: 0
+  // CHECK-NEXT: PreCall<fgets(buf, 42, fp)> prepares tainting arg index: 0 Flow Id: 0
   //
-  // CHECK-NEXT: PostCall<fgets(buf, 42, fp)> actually wants to taint arg index: -1
-  // CHECK-NEXT: PostCall<fgets(buf, 42, fp)> actually wants to taint arg index: 0
-  // CHECK-NEXT: PostCall<fgets(buf, 42, fp)> actually wants to taint arg index: 2
+  // CHECK-NEXT: PreCall<fgets(buf, 42, fp)> prepares tainting arg index: 2 Flow Id: 0
+  // CHECK-NEXT: PostCall<fgets(buf, 42, fp)> actually wants to taint arg index: -1 Flow Id: 0
+  // CHECK-NEXT: PostCall<fgets(buf, 42, fp)> actually wants to taint arg index: 0 Flow Id: 0
+  // PostCall<fgets(buf, 42, fp)> actually wants to taint arg index: 2 Flow Id: 0
+
 }
Index: clang/test/Analysis/taint-checker-callback-order-has-definition.c
===================================================================
--- clang/test/Analysis/taint-checker-callback-order-has-definition.c
+++ clang/test/Analysis/taint-checker-callback-order-has-definition.c
@@ -18,18 +18,21 @@
 
 void top(const char *fname, char *buf) {
   FILE *fp = fopen(fname, "r");
-  // CHECK:      PreCall<fopen(fname, "r")> prepares tainting arg index: -1
-  // CHECK-NEXT: PostCall<fopen(fname, "r")> actually wants to taint arg index: -1
+  // CHECK:      New taint flow. Flow id: 0
+  // CHECK-NEXT: PreCall<fopen(fname, "r")> prepares tainting arg index: -1 Flow Id: 0
+  // CHECK-NEXT: PostCall<fopen(fname, "r")> actually wants to taint arg index: -1 Flow Id: 0
 
   if (!fp)
     return;
 
   (void)fgets(buf, 42, fp); // Trigger taint propagation.
-  // CHECK-NEXT: PreCall<fgets(buf, 42, fp)> prepares tainting arg index: -1
-  // CHECK-NEXT: PreCall<fgets(buf, 42, fp)> prepares tainting arg index: 0
-  // CHECK-NEXT: PreCall<fgets(buf, 42, fp)> prepares tainting arg index: 2
-  //
-  // CHECK-NEXT: PostCall<fgets(buf, 42, fp)> actually wants to taint arg index: -1
-  // CHECK-NEXT: PostCall<fgets(buf, 42, fp)> actually wants to taint arg index: 0
-  // CHECK-NEXT: PostCall<fgets(buf, 42, fp)> actually wants to taint arg index: 2
+ // CHECK-NEXT: PreCall<fgets(buf, 42, fp)> prepares tainting arg index: -1 Flow Id: 0
+ // CHECK-NEXT: PreCall<fgets(buf, 42, fp)> prepares tainting arg index: 0 Flow Id: 0
+ // CHECK-NEXT: PreCall<fgets(buf, 42, fp)> prepares tainting arg index: 0 Flow Id: 0
+ // CHECK-NEXT: PreCall<fgets(buf, 42, fp)> prepares tainting arg index: 2 Flow Id: 0
+ // CHECK-NEXT: PostCall<fgets(buf, 42, fp)> actually wants to taint arg index: -1 Flow Id: 0
+ // CHECK-NEXT: PostCall<fgets(buf, 42, fp)> actually wants to taint arg index: 0 Flow Id: 0
+ // CHECK-NEXT: PostCall<fgets(buf, 42, fp)> actually wants to taint arg index: 2 Flow Id: 0
 }
+
+
Index: clang/test/Analysis/diagnostics/sarif-diagnostics-taint-test.c
===================================================================
--- clang/test/Analysis/diagnostics/sarif-diagnostics-taint-test.c
+++ clang/test/Analysis/diagnostics/sarif-diagnostics-taint-test.c
@@ -5,7 +5,7 @@
 
 void f(void) {
   char s[80];
-  scanf("%s", s);
+  scanf("%s", s); 
   int d = atoi(s); // expected-warning {{tainted}}
 }
 
Index: clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-multi-diagnostic-test.c.sarif
===================================================================
--- clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-multi-diagnostic-test.c.sarif
+++ clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-multi-diagnostic-test.c.sarif
@@ -44,7 +44,7 @@
                     {
                       "importance": "essential",
                       "location": {
-                        "message": {
+                        "message": {         
                           "text": "tainted"
                         },
                         "physicalLocation": {
Index: clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-diagnostics-taint-test.c.sarif
===================================================================
--- clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-diagnostics-taint-test.c.sarif
+++ clang/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-diagnostics-taint-test.c.sarif
@@ -4,7 +4,7 @@
     {
       "artifacts": [
         {
-          "length": 434,
+          "length": 435,
           "location": {
             "index": 0,
           },
Index: clang/lib/StaticAnalyzer/Core/CommonBugCategories.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Core/CommonBugCategories.cpp
+++ clang/lib/StaticAnalyzer/Core/CommonBugCategories.cpp
@@ -23,6 +23,7 @@
 const char *const CXXMoveSemantics = "C++ move semantics";
 const char *const SecurityError = "Security error";
 const char *const UnusedCode = "Unused code";
+const char *const TaintedData = "Tainted data used";
 } // namespace categories
 } // namespace ento
 } // namespace clang
Index: clang/lib/StaticAnalyzer/Core/BugReporter.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Core/BugReporter.cpp
+++ clang/lib/StaticAnalyzer/Core/BugReporter.cpp
@@ -2130,8 +2130,8 @@
 PathSensitiveBugReport::PathSensitiveBugReport(
     const BugType &bt, StringRef shortDesc, StringRef desc,
     const ExplodedNode *errorNode, PathDiagnosticLocation LocationToUnique,
-    const Decl *DeclToUnique)
-    : BugReport(Kind::PathSensitive, bt, shortDesc, desc), ErrorNode(errorNode),
+    const Decl *DeclToUnique, BugReport::Kind kind)
+    : BugReport(kind, bt, shortDesc, desc), ErrorNode(errorNode),
       ErrorNodeRange(getStmt() ? getStmt()->getSourceRange() : SourceRange()),
       UniqueingLocation(LocationToUnique), UniqueingDecl(DeclToUnique) {
   assert(!isDependency(ErrorNode->getState()
Index: clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp
+++ clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp
@@ -167,8 +167,7 @@
 
   // Check if the size is tainted.
   if (isTainted(State, SizeV)) {
-    reportBug(VLA_Tainted, SizeE, nullptr, C,
-              std::make_unique<TaintBugVisitor>(SizeV));
+    reportBug(VLA_Tainted, SizeE, nullptr, C);
     return nullptr;
   }
 
@@ -217,9 +216,16 @@
   if (!N)
     return;
 
-  if (!BT)
-    BT.reset(new BuiltinBug(
-        this, "Dangerous variable-length array (VLA) declaration"));
+  if (!BT) {
+    if (Kind == VLA_Tainted)
+      BT.reset(new BugType(this,
+                           "Dangerous variable-length array (VLA) declaration",
+                           categories::TaintedData));
+    else
+      BT.reset(new BugType(this,
+                           "Dangerous variable-length array (VLA) declaration",
+                           categories::LogicError));
+  }
 
   SmallString<256> buf;
   llvm::raw_svector_ostream os(buf);
@@ -242,7 +248,12 @@
     break;
   }
 
-  auto report = std::make_unique<PathSensitiveBugReport>(*BT, os.str(), N);
+  std::optional<TaintData> TD = std::nullopt;
+  if (Kind == VLA_Tainted)
+    TD = getTaintDataPointeeOrPointer(C, C.getSVal(SizeE));
+
+  auto report = TD ? std::make_unique<TaintBugReport>(*BT, os.str(), N, *TD)
+                   : std::make_unique<PathSensitiveBugReport>(*BT, os.str(), N);
   report->addVisitor(std::move(Visitor));
   report->addRange(SizeE->getSourceRange());
   bugreporter::trackExpressionValue(N, SizeE, *report);
Index: clang/lib/StaticAnalyzer/Checkers/Taint.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Checkers/Taint.cpp
+++ clang/lib/StaticAnalyzer/Checkers/Taint.cpp
@@ -13,18 +13,17 @@
 #include "clang/StaticAnalyzer/Checkers/Taint.h"
 #include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
-#include <optional>
 
 using namespace clang;
 using namespace ento;
 using namespace taint;
 
 // Fully tainted symbols.
-REGISTER_MAP_WITH_PROGRAMSTATE(TaintMap, SymbolRef, TaintTagType)
+REGISTER_MAP_WITH_PROGRAMSTATE(TaintMap, SymbolRef, TaintData)
 
 // Partially tainted symbols.
 REGISTER_MAP_FACTORY_WITH_PROGRAMSTATE(TaintedSubRegions, const SubRegion *,
-                                       TaintTagType)
+                                       TaintData)
 REGISTER_MAP_WITH_PROGRAMSTATE(DerivedSymTaint, SymbolRef, TaintedSubRegions)
 
 void taint::printTaint(ProgramStateRef State, raw_ostream &Out, const char *NL,
@@ -43,16 +42,14 @@
 }
 
 ProgramStateRef taint::addTaint(ProgramStateRef State, const Stmt *S,
-                                const LocationContext *LCtx,
-                                TaintTagType Kind) {
-  return addTaint(State, State->getSVal(S, LCtx), Kind);
+                                const LocationContext *LCtx, TaintData Data) {
+  return addTaint(State, State->getSVal(S, LCtx), Data);
 }
 
-ProgramStateRef taint::addTaint(ProgramStateRef State, SVal V,
-                                TaintTagType Kind) {
+ProgramStateRef taint::addTaint(ProgramStateRef State, SVal V, TaintData Data) {
   SymbolRef Sym = V.getAsSymbol();
   if (Sym)
-    return addTaint(State, Sym, Kind);
+    return addTaint(State, Sym, Data);
 
   // If the SVal represents a structure, try to mass-taint all values within the
   // structure. For now it only works efficiently on lazy compound values that
@@ -68,29 +65,29 @@
             State->getStateManager().getStoreManager().getDefaultBinding(
                 *LCV)) {
       if (SymbolRef Sym = binding->getAsSymbol())
-        return addPartialTaint(State, Sym, LCV->getRegion(), Kind);
+        return addPartialTaint(State, Sym, LCV->getRegion(), Data);
     }
   }
 
   const MemRegion *R = V.getAsRegion();
-  return addTaint(State, R, Kind);
+  return addTaint(State, R, Data);
 }
 
 ProgramStateRef taint::addTaint(ProgramStateRef State, const MemRegion *R,
-                                TaintTagType Kind) {
+                                TaintData Data) {
   if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(R))
-    return addTaint(State, SR->getSymbol(), Kind);
+    return addTaint(State, SR->getSymbol(), Data);
   return State;
 }
 
 ProgramStateRef taint::addTaint(ProgramStateRef State, SymbolRef Sym,
-                                TaintTagType Kind) {
+                                TaintData Data) {
   // If this is a symbol cast, remove the cast before adding the taint. Taint
   // is cast agnostic.
   while (const SymbolCast *SC = dyn_cast<SymbolCast>(Sym))
     Sym = SC->getOperand();
 
-  ProgramStateRef NewState = State->set<TaintMap>(Sym, Kind);
+  ProgramStateRef NewState = State->set<TaintMap>(Sym, Data);
   assert(NewState);
   return NewState;
 }
@@ -124,63 +121,43 @@
 ProgramStateRef taint::addPartialTaint(ProgramStateRef State,
                                        SymbolRef ParentSym,
                                        const SubRegion *SubRegion,
-                                       TaintTagType Kind) {
+                                       TaintData Data) {
   // Ignore partial taint if the entire parent symbol is already tainted.
-  if (const TaintTagType *T = State->get<TaintMap>(ParentSym))
-    if (*T == Kind)
+  if (const TaintData *T = State->get<TaintMap>(ParentSym))
+    if (T->Type == Data.Type)
       return State;
 
   // Partial taint applies if only a portion of the symbol is tainted.
   if (SubRegion == SubRegion->getBaseRegion())
-    return addTaint(State, ParentSym, Kind);
+    return addTaint(State, ParentSym, Data);
 
   const TaintedSubRegions *SavedRegs = State->get<DerivedSymTaint>(ParentSym);
   TaintedSubRegions::Factory &F = State->get_context<TaintedSubRegions>();
   TaintedSubRegions Regs = SavedRegs ? *SavedRegs : F.getEmptyMap();
 
-  Regs = F.add(Regs, SubRegion, Kind);
+  Regs = F.add(Regs, SubRegion, Data);
   ProgramStateRef NewState = State->set<DerivedSymTaint>(ParentSym, Regs);
   assert(NewState);
   return NewState;
 }
 
-bool taint::isTainted(ProgramStateRef State, const Stmt *S,
-                      const LocationContext *LCtx, TaintTagType Kind) {
+std::optional<TaintData> taint::getTaintData(ProgramStateRef State, const Stmt *S,
+                                        const LocationContext *LCtx) {
   SVal val = State->getSVal(S, LCtx);
-  return isTainted(State, val, Kind);
+  return getTaintData(State, val);
 }
 
-bool taint::isTainted(ProgramStateRef State, SVal V, TaintTagType Kind) {
+std::optional<TaintData> taint::getTaintData(ProgramStateRef State, SVal V) {
   if (SymbolRef Sym = V.getAsSymbol())
-    return isTainted(State, Sym, Kind);
+    return getTaintData(State, Sym);
   if (const MemRegion *Reg = V.getAsRegion())
-    return isTainted(State, Reg, Kind);
-  return false;
-}
-
-bool taint::isTainted(ProgramStateRef State, const MemRegion *Reg,
-                      TaintTagType K) {
-  if (!Reg)
-    return false;
-
-  // Element region (array element) is tainted if either the base or the offset
-  // are tainted.
-  if (const ElementRegion *ER = dyn_cast<ElementRegion>(Reg))
-    return isTainted(State, ER->getSuperRegion(), K) ||
-           isTainted(State, ER->getIndex(), K);
-
-  if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(Reg))
-    return isTainted(State, SR->getSymbol(), K);
-
-  if (const SubRegion *ER = dyn_cast<SubRegion>(Reg))
-    return isTainted(State, ER->getSuperRegion(), K);
-
-  return false;
+    return getTaintData(State, Reg);
+  return std::nullopt;
 }
 
-bool taint::isTainted(ProgramStateRef State, SymbolRef Sym, TaintTagType Kind) {
+std::optional<TaintData> taint::getTaintData(ProgramStateRef State, SymbolRef Sym) {
   if (!Sym)
-    return false;
+    return std::nullopt;
 
   // Traverse all the symbols this symbol depends on to see if any are tainted.
   for (SymExpr::symbol_iterator SI = Sym->symbol_begin(),
@@ -189,15 +166,14 @@
     if (!isa<SymbolData>(*SI))
       continue;
 
-    if (const TaintTagType *Tag = State->get<TaintMap>(*SI)) {
-      if (*Tag == Kind)
-        return true;
+    if (const TaintData *Data = State->get<TaintMap>(*SI)) {
+      return *Data;
     }
 
     if (const auto *SD = dyn_cast<SymbolDerived>(*SI)) {
       // If this is a SymbolDerived with a tainted parent, it's also tainted.
-      if (isTainted(State, SD->getParentSymbol(), Kind))
-        return true;
+      if (getTaintData(State, SD->getParentSymbol()))
+        return getTaintData(State, SD->getParentSymbol());
 
       // If this is a SymbolDerived with the same parent symbol as another
       // tainted SymbolDerived and a region that's a sub-region of that tainted
@@ -210,46 +186,142 @@
           // complete. For example, this would not currently identify
           // overlapping fields in a union as tainted. To identify this we can
           // check for overlapping/nested byte offsets.
-          if (Kind == I.second && R->isSubRegionOf(I.first))
-            return true;
+          if (R->isSubRegionOf(I.first))
+            return I.second;
         }
       }
     }
 
     // If memory region is tainted, data is also tainted.
     if (const auto *SRV = dyn_cast<SymbolRegionValue>(*SI)) {
-      if (isTainted(State, SRV->getRegion(), Kind))
-        return true;
+      if (getTaintData(State, SRV->getRegion()))
+        return getTaintData(State, SRV->getRegion());
     }
 
     // If this is a SymbolCast from a tainted value, it's also tainted.
     if (const auto *SC = dyn_cast<SymbolCast>(*SI)) {
-      if (isTainted(State, SC->getOperand(), Kind))
-        return true;
+      if (getTaintData(State, SC->getOperand()))
+        return getTaintData(State, SC->getOperand());
     }
   }
+  return std::nullopt;
+}
+
+std::optional<TaintData> taint::getTaintData(ProgramStateRef State,
+                                        const MemRegion *Reg) {
+  if (!Reg)
+    return std::nullopt;
+
+  // Element region (array element) is tainted if either the base or the offset
+  // are tainted.
+  if (const ElementRegion *ER = dyn_cast<ElementRegion>(Reg)) {
+    if (getTaintData(State, ER->getSuperRegion()))
+      return getTaintData(State, ER->getSuperRegion());
+    else
+      return getTaintData(State, ER->getIndex());
+  }
+
+  if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(Reg))
+    return getTaintData(State, SR->getSymbol());
+
+  if (const SubRegion *ER = dyn_cast<SubRegion>(Reg))
+    return getTaintData(State, ER->getSuperRegion());
+
+  return std::nullopt;
+}
 
+bool taint::isStdin(SVal Val, const ASTContext &ACtx) {
+  // FIXME: What if Val is NonParamVarRegion?
+
+  // The region should be symbolic, we do not know it's value.
+  const auto *SymReg = dyn_cast_or_null<SymbolicRegion>(Val.getAsRegion());
+  if (!SymReg)
+    return false;
+
+  // Get it's symbol and find the declaration region it's pointing to.
+  const auto *DeclReg =
+      dyn_cast_or_null<DeclRegion>(SymReg->getSymbol()->getOriginRegion());
+  if (!DeclReg)
+    return false;
+
+  // This region corresponds to a declaration, find out if it's a global/extern
+  // variable named stdin with the proper type.
+  if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
+    D = D->getCanonicalDecl();
+    // FIXME: This should look for an exact match.
+    if (D->getName().contains("stdin") && D->isExternC()) {
+      const QualType FILETy = ACtx.getFILEType().getCanonicalType();
+      const QualType Ty = D->getType().getCanonicalType();
+
+      if (Ty->isPointerType())
+        return Ty->getPointeeType() == FILETy;
+    }
+  }
   return false;
 }
 
-PathDiagnosticPieceRef TaintBugVisitor::VisitNode(const ExplodedNode *N,
-                                                  BugReporterContext &BRC,
-                                                  PathSensitiveBugReport &BR) {
+std::optional<TaintData> taint::getTaintDataPointeeOrPointer(const CheckerContext &C,
+                                                        SVal Arg) {
+  const ProgramStateRef State = C.getState();
+
+  if (auto Pointee = getPointeeOf(C, Arg))
+    if (isTainted(State, *Pointee))
+      return getTaintData(State, *Pointee);
+
+  if (isTainted(State, Arg))
+    return getTaintData(State, Arg);
+
+  return std::nullopt;
+}
+
+std::optional<TaintData>
+taint::getTaintDataPointeeOrPointer(const Expr *E, const ProgramStateRef &State,
+                                    CheckerContext &C) {
+  return getTaintDataPointeeOrPointer(C, C.getSVal(E));
+}
 
-  // Find the ExplodedNode where the taint was first introduced
-  if (!isTainted(N->getState(), V) ||
-      isTainted(N->getFirstPred()->getState(), V))
-    return nullptr;
+SVal taint::getPointeeOf(const CheckerContext &C, Loc LValue) {
+  const QualType ArgTy = LValue.getType(C.getASTContext());
+  if (!ArgTy->isPointerType() || !ArgTy->getPointeeType()->isVoidType())
+    return C.getState()->getSVal(LValue);
 
-  const Stmt *S = N->getStmtForDiagnostics();
-  if (!S)
-    return nullptr;
+  // Do not dereference void pointers. Treat them as byte pointers instead.
+  // FIXME: we might want to consider more than just the first byte.
+  return C.getState()->getSVal(LValue, C.getASTContext().CharTy);
+}
+
+/// Given a pointer/reference argument, return the value it refers to.
+std::optional<SVal> taint::getPointeeOf(const CheckerContext &C, SVal Arg) {
+  if (auto LValue = Arg.getAs<Loc>())
+    return getPointeeOf(C, *LValue);
+  return std::nullopt;
+}
+
+bool taint::isTainted(ProgramStateRef State, const Stmt *S,
+                      const LocationContext *LCtx, TaintTagType Kind) {
+  SVal val = State->getSVal(S, LCtx);
+  return isTainted(State, val, Kind);
+}
 
-  const LocationContext *NCtx = N->getLocationContext();
-  PathDiagnosticLocation L =
-      PathDiagnosticLocation::createBegin(S, BRC.getSourceManager(), NCtx);
-  if (!L.isValid() || !L.asLocation().isValid())
-    return nullptr;
+bool taint::isTainted(ProgramStateRef State, SVal V, TaintTagType Kind) {
+  if (SymbolRef Sym = V.getAsSymbol())
+    return isTainted(State, Sym, Kind);
+  if (const MemRegion *Reg = V.getAsRegion())
+    return isTainted(State, Reg, Kind);
+  return false;
+}
 
-  return std::make_shared<PathDiagnosticEventPiece>(L, "Taint originated here");
+bool taint::isTainted(ProgramStateRef State, const MemRegion *Reg,
+                      TaintTagType Kind) {
+  if (!Reg)
+    return false;
+  std::optional<TaintData> TD = getTaintData(State, Reg);
+  return TD ? (TD->Type == Kind) : false;
 }
+
+bool taint::isTainted(ProgramStateRef State, SymbolRef Sym, TaintTagType Kind) {
+  if (!Sym)
+    return false;
+  std::optional<TaintData> TD = getTaintData(State, Sym);
+  return TD ? (TD->Type == Kind) : false;
+}
\ No newline at end of file
Index: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -25,7 +25,9 @@
 #include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
+
 #include "llvm/Support/YAMLTraits.h"
 
 #include <limits>
@@ -71,6 +73,25 @@
 using ArgIdxTy = int;
 using ArgVecTy = llvm::SmallVector<ArgIdxTy, 2>;
 
+/// In this structure we follow the spread of the taint flows through
+/// the function arguments.
+struct ArgIdxFlowTy {
+  ArgIdxTy ArgIdx;
+  int FlowId;
+  ArgIdxFlowTy(ArgIdxTy i) : ArgIdx(i), FlowId(UninitializedFlow){};
+  ArgIdxFlowTy(ArgIdxTy i, int f) : ArgIdx(i), FlowId(f){};
+  bool operator==(const ArgIdxFlowTy &X) const {
+    return FlowId == X.FlowId && ArgIdx == X.ArgIdx;
+  }
+  int operator<(const ArgIdxFlowTy &X) const {
+    return FlowId < X.FlowId && ArgIdx < X.ArgIdx;
+  }
+  void Profile(llvm::FoldingSetNodeID &ID) const {
+    ID.AddInteger(FlowId);
+    ID.AddInteger(ArgIdx);
+  }
+};
+
 /// Denotes the return value.
 constexpr ArgIdxTy ReturnValueIndex{-1};
 
@@ -81,56 +102,6 @@
   return Count;
 }
 
-/// Check if the region the expression evaluates to is the standard input,
-/// and thus, is tainted.
-/// FIXME: Move this to Taint.cpp.
-bool isStdin(SVal Val, const ASTContext &ACtx) {
-  // FIXME: What if Val is NonParamVarRegion?
-
-  // The region should be symbolic, we do not know it's value.
-  const auto *SymReg = dyn_cast_or_null<SymbolicRegion>(Val.getAsRegion());
-  if (!SymReg)
-    return false;
-
-  // Get it's symbol and find the declaration region it's pointing to.
-  const auto *DeclReg =
-      dyn_cast_or_null<DeclRegion>(SymReg->getSymbol()->getOriginRegion());
-  if (!DeclReg)
-    return false;
-
-  // This region corresponds to a declaration, find out if it's a global/extern
-  // variable named stdin with the proper type.
-  if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
-    D = D->getCanonicalDecl();
-    // FIXME: This should look for an exact match.
-    if (D->getName().contains("stdin") && D->isExternC()) {
-      const QualType FILETy = ACtx.getFILEType().getCanonicalType();
-      const QualType Ty = D->getType().getCanonicalType();
-
-      if (Ty->isPointerType())
-        return Ty->getPointeeType() == FILETy;
-    }
-  }
-  return false;
-}
-
-SVal getPointeeOf(const CheckerContext &C, Loc LValue) {
-  const QualType ArgTy = LValue.getType(C.getASTContext());
-  if (!ArgTy->isPointerType() || !ArgTy->getPointeeType()->isVoidType())
-    return C.getState()->getSVal(LValue);
-
-  // Do not dereference void pointers. Treat them as byte pointers instead.
-  // FIXME: we might want to consider more than just the first byte.
-  return C.getState()->getSVal(LValue, C.getASTContext().CharTy);
-}
-
-/// Given a pointer/reference argument, return the value it refers to.
-std::optional<SVal> getPointeeOf(const CheckerContext &C, SVal Arg) {
-  if (auto LValue = Arg.getAs<Loc>())
-    return getPointeeOf(C, *LValue);
-  return std::nullopt;
-}
-
 /// Given a pointer, return the SVal of its pointee or if it is tainted,
 /// otherwise return the pointer's SVal if tainted.
 /// Also considers stdin as a taint source.
@@ -182,6 +153,27 @@
   std::optional<ArgIdxTy> VariadicIndex;
 };
 
+const NoteTag *createTaintOriginTag(CheckerContext &C, int FlowId) {
+  LLVM_DEBUG(llvm::dbgs() << "New taint flow. Flow id: " << FlowId << '\n';);
+  const NoteTag *InjectionTag =
+      C.getNoteTag([&C, FlowId](PathSensitiveBugReport &BR) -> std::string {
+        SmallString<256> Msg;
+        llvm::raw_svector_ostream Out(Msg);
+        // Only make this note when the report is taint related and the flow
+        // id of the origin matches with the flow id of the sink.
+        // We are not using "interestingess" for this as it is not
+        // propagating the same way as taintedness property.
+        if (TaintBugReport *TBR = dyn_cast_or_null<TaintBugReport>(&BR))
+          if (TBR->taint_data.Flow == FlowId) {
+            LLVM_DEBUG(llvm::dbgs() << "Taint originated here. Flow id: "
+                                    << std::to_string(FlowId) << "\n");
+            Out << "Taint originated here";
+          }
+        return std::string(Out.str());
+      });
+  return InjectionTag;
+}
+
 /// A struct used to specify taint propagation rules for a function.
 ///
 /// If any of the possible taint source arguments is tainted, all of the
@@ -193,7 +185,7 @@
   ArgSet SinkArgs;
   /// Arguments which should be sanitized on function return.
   ArgSet FilterArgs;
-  /// Arguments which can participate in taint propagationa. If any of the
+  /// Arguments which can participate in taint propagation. If any of the
   /// arguments in PropSrcArgs is tainted, all arguments in  PropDstArgs should
   /// be tainted.
   ArgSet PropSrcArgs;
@@ -334,7 +326,6 @@
 public:
   void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
   void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
-
   void printState(raw_ostream &Out, ProgramStateRef State, const char *NL,
                   const char *Sep) const override;
 
@@ -342,8 +333,13 @@
   bool generateReportIfTainted(const Expr *E, StringRef Msg,
                                CheckerContext &C) const;
 
+  static TaintFlowType generateNewTaintFlow() { return FlowIdCount++; }
+
 private:
-  const BugType BT{this, "Use of Untrusted Data", "Untrusted Data"};
+  /// A unique counter for each taint flow.
+  /// We must increase this whenever the analysis hits a new taint source.
+  static TaintFlowType FlowIdCount;
+  const BugType BT{this, "Use of Untrusted Data", categories::TaintedData};
 
   bool checkUncontrolledFormatString(const CallEvent &Call,
                                      CheckerContext &C) const;
@@ -365,6 +361,8 @@
   mutable std::optional<RuleLookupTy> StaticTaintRules;
   mutable std::optional<RuleLookupTy> DynamicTaintRules;
 };
+TaintFlowType GenericTaintChecker::FlowIdCount = 0;
+
 } // end of anonymous namespace
 
 /// YAML serialization mapping.
@@ -424,8 +422,8 @@
 /// ReturnValueIndex, or indexes of the pointer/reference argument, which
 /// points to data, which should be tainted on return.
 REGISTER_MAP_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, const LocationContext *,
-                               ImmutableSet<ArgIdxTy>)
-REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(ArgIdxFactory, ArgIdxTy)
+                               ImmutableSet<ArgIdxFlowTy>)
+REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(ArgIdxFactory, ArgIdxFlowTy)
 
 void GenericTaintRuleParser::validateArgVector(const std::string &Option,
                                                const ArgVecTy &Args) const {
@@ -776,29 +774,32 @@
   // stored in the state as TaintArgsOnPostVisit set.
   TaintArgsOnPostVisitTy TaintArgsMap = State->get<TaintArgsOnPostVisit>();
 
-  const ImmutableSet<ArgIdxTy> *TaintArgs = TaintArgsMap.lookup(CurrentFrame);
+  const ImmutableSet<ArgIdxFlowTy> *TaintArgs =
+      TaintArgsMap.lookup(CurrentFrame);
   if (!TaintArgs)
     return;
   assert(!TaintArgs->isEmpty());
 
-  LLVM_DEBUG(for (ArgIdxTy I
+  LLVM_DEBUG(for (ArgIdxFlowTy I
                   : *TaintArgs) {
     llvm::dbgs() << "PostCall<";
     Call.dump(llvm::dbgs());
-    llvm::dbgs() << "> actually wants to taint arg index: " << I << '\n';
+    llvm::dbgs() << "> actually wants to taint arg index: " << I.ArgIdx
+                 << " Flow Id: " << I.FlowId << "\n";
   });
 
-  for (ArgIdxTy ArgNum : *TaintArgs) {
+  for (ArgIdxFlowTy ArgNum : *TaintArgs) {
     // Special handling for the tainted return value.
-    if (ArgNum == ReturnValueIndex) {
-      State = addTaint(State, Call.getReturnValue());
+    if (ArgNum.ArgIdx == ReturnValueIndex) {
+      State = addTaint(State, Call.getReturnValue(), ArgNum.FlowId);
       continue;
     }
 
     // The arguments are pointer arguments. The data they are pointing at is
     // tainted after the call.
-    if (auto V = getPointeeOf(C, Call.getArgSVal(ArgNum)))
-      State = addTaint(State, *V);
+    if (auto V = getPointeeOf(C, Call.getArgSVal(ArgNum.ArgIdx))) {
+      State = addTaint(State, *V, ArgNum.FlowId);
+    }
   }
 
   // Clear up the taint info from the state.
@@ -826,8 +827,13 @@
 
   /// Check for taint sinks.
   ForEachCallArg([this, &Checker, &C, &State](ArgIdxTy I, const Expr *E, SVal) {
-    if (SinkArgs.contains(I) && isTaintedOrPointsToTainted(E, State, C))
+    if (SinkArgs.contains(I) && isTaintedOrPointsToTainted(E, State, C)) {
+      LLVM_DEBUG(llvm::dbgs()
+                     << "Potential Injection vulnerability detected. Flow id: "
+                     << getTaintDataPointeeOrPointer(C, C.getSVal(E))->Flow
+                     << '\n';);
       Checker.generateReportIfTainted(E, SinkMsg.value_or(MsgCustomSink), C);
+    }
   });
 
   /// Check for taint filters.
@@ -843,11 +849,31 @@
   /// A rule is relevant if PropSrcArgs is empty, or if any of its signified
   /// args are tainted in context of the current CallEvent.
   bool IsMatching = PropSrcArgs.isEmpty();
-  ForEachCallArg(
-      [this, &C, &IsMatching, &State](ArgIdxTy I, const Expr *E, SVal) {
-        IsMatching = IsMatching || (PropSrcArgs.contains(I) &&
-                                    isTaintedOrPointsToTainted(E, State, C));
-      });
+  TaintFlowType TaintFlowId = UninitializedFlow;
+
+  const NoteTag *InjectionTag = nullptr;
+  if (PropSrcArgs.isEmpty() && !PropDstArgs.isEmpty()) {
+    // The attacker injects the taint value here
+    // We place here a noteTag and assign a new Flow id.
+    TaintFlowId = GenericTaintChecker::generateNewTaintFlow();
+    InjectionTag = createTaintOriginTag(C, TaintFlowId);
+  }
+
+  ForEachCallArg([this, &C, &IsMatching, &State, &TaintFlowId,
+                  &InjectionTag](ArgIdxTy I, const Expr *E, SVal) mutable {
+    IsMatching = IsMatching || (PropSrcArgs.contains(I) &&
+                                isTaintedOrPointsToTainted(E, State, C));
+    if (TaintFlowId == UninitializedFlow) {
+      if (isStdin(C.getSVal(E), C.getASTContext())) {
+        // Reading from stdin. Creating a new flow
+        TaintFlowId = GenericTaintChecker::generateNewTaintFlow();
+        InjectionTag = createTaintOriginTag(C, TaintFlowId);
+      } else if (getTaintDataPointeeOrPointer(C, C.getSVal(E)).has_value()) {
+        // Propagating an existing Flow Id
+        TaintFlowId = getTaintDataPointeeOrPointer(C, C.getSVal(E))->Flow;
+      }
+    }
+  });
 
   if (!IsMatching)
     return;
@@ -865,32 +891,34 @@
 
   /// Propagate taint where it is necessary.
   auto &F = State->getStateManager().get_context<ArgIdxFactory>();
-  ImmutableSet<ArgIdxTy> Result = F.getEmptySet();
-  ForEachCallArg(
-      [&](ArgIdxTy I, const Expr *E, SVal V) {
-        if (PropDstArgs.contains(I)) {
-          LLVM_DEBUG(llvm::dbgs() << "PreCall<"; Call.dump(llvm::dbgs());
-                     llvm::dbgs()
-                     << "> prepares tainting arg index: " << I << '\n';);
-          Result = F.add(Result, I);
-        }
-
-        // TODO: We should traverse all reachable memory regions via the
-        // escaping parameter. Instead of doing that we simply mark only the
-        // referred memory region as tainted.
-        if (WouldEscape(V, E->getType())) {
-          LLVM_DEBUG(if (!Result.contains(I)) {
-            llvm::dbgs() << "PreCall<";
-            Call.dump(llvm::dbgs());
-            llvm::dbgs() << "> prepares tainting arg index: " << I << '\n';
-          });
-          Result = F.add(Result, I);
-        }
+  ImmutableSet<ArgIdxFlowTy> Result = F.getEmptySet();
+  ForEachCallArg([&](ArgIdxTy I, const Expr *E, SVal V) {
+    if (PropDstArgs.contains(I)) {
+      LLVM_DEBUG(llvm::dbgs() << "PreCall<"; Call.dump(llvm::dbgs());
+                 llvm::dbgs() << "> prepares tainting arg index: " << I
+                              << " Flow Id: " << TaintFlowId << '\n';);
+      ArgIdxFlowTy Flow(I, TaintFlowId);
+      Result = F.add(Result, Flow);
+    }
+
+    // TODO: We should traverse all reachable memory regions via the
+    // escaping parameter. Instead of doing that we simply mark only the
+    // referred memory region as tainted.
+    if (WouldEscape(V, E->getType())) {
+      LLVM_DEBUG(if (!Result.contains(I)) {
+        llvm::dbgs() << "PreCall<";
+        Call.dump(llvm::dbgs());
+        llvm::dbgs() << "> prepares tainting arg index: " << I
+                     << " Flow Id: " << TaintFlowId << '\n';
       });
+      ArgIdxFlowTy Flow(I, TaintFlowId);
+      Result = F.add(Result, Flow);
+    }
+  });
 
   if (!Result.isEmpty())
     State = State->set<TaintArgsOnPostVisit>(C.getStackFrame(), Result);
-  C.addTransition(State);
+  C.addTransition(State, InjectionTag);
 }
 
 bool GenericTaintRule::UntrustedEnv(CheckerContext &C) {
@@ -902,16 +930,14 @@
 bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg,
                                                   CheckerContext &C) const {
   assert(E);
-  std::optional<SVal> TaintedSVal{getTaintedPointeeOrPointer(C, C.getSVal(E))};
+  std::optional<TaintData> TD = getTaintDataPointeeOrPointer(C, C.getSVal(E));
 
-  if (!TaintedSVal)
+  if (!TD) {
     return false;
-
+  }
   // Generate diagnostic.
   if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
-    auto report = std::make_unique<PathSensitiveBugReport>(BT, Msg, N);
-    report->addRange(E->getSourceRange());
-    report->addVisitor(std::make_unique<TaintBugVisitor>(*TaintedSVal));
+    auto report = std::make_unique<TaintBugReport>(BT, Msg, N, *TD);
     C.emitReport(std::move(report));
     return true;
   }
@@ -982,7 +1008,8 @@
 
   ProgramStateRef State = C.getState();
   auto &F = State->getStateManager().get_context<ArgIdxFactory>();
-  ImmutableSet<ArgIdxTy> Result = F.add(F.getEmptySet(), ReturnValueIndex);
+  ImmutableSet<ArgIdxFlowTy> Result =
+      F.add(F.getEmptySet(), ArgIdxFlowTy(ReturnValueIndex));
   State = State->set<TaintArgsOnPostVisit>(C.getStackFrame(), Result);
   C.addTransition(State);
 }
Index: clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp
+++ clang/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp
@@ -25,8 +25,9 @@
 
 namespace {
 class DivZeroChecker : public Checker< check::PreStmt<BinaryOperator> > {
-  mutable std::unique_ptr<BuiltinBug> BT;
-  void reportBug(const char *Msg, ProgramStateRef StateZero, CheckerContext &C,
+  mutable std::unique_ptr<BugType> BT;
+  void reportBug(const char *Msg, std::string Category,
+                 ProgramStateRef StateZero, CheckerContext &C,
                  std::unique_ptr<BugReporterVisitor> Visitor = nullptr) const;
 
 public:
@@ -42,13 +43,19 @@
 }
 
 void DivZeroChecker::reportBug(
-    const char *Msg, ProgramStateRef StateZero, CheckerContext &C,
-    std::unique_ptr<BugReporterVisitor> Visitor) const {
+    const char *Msg, std::string Category, ProgramStateRef StateZero,
+    CheckerContext &C, std::unique_ptr<BugReporterVisitor> Visitor) const {
   if (ExplodedNode *N = C.generateErrorNode(StateZero)) {
     if (!BT)
-      BT.reset(new BuiltinBug(this, "Division by zero"));
+      BT.reset(new BugType(this, "Division by zero", Category));
 
-    auto R = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N);
+    std::optional<TaintData> TD = std::nullopt;
+    if (Category == categories::TaintedData)
+      TD = getTaintDataPointeeOrPointer(C, C.getSVal(getDenomExpr(N)));
+
+    auto R = TD ? std::make_unique<TaintBugReport>(*BT, Msg, N, *TD)
+                : std::make_unique<PathSensitiveBugReport>(*BT, Msg, N);
+    std::make_unique<PathSensitiveBugReport>(*BT, Msg, N);
     R->addVisitor(std::move(Visitor));
     bugreporter::trackExpressionValue(N, getDenomExpr(N), *R);
     C.emitReport(std::move(R));
@@ -82,14 +89,14 @@
 
   if (!stateNotZero) {
     assert(stateZero);
-    reportBug("Division by zero", stateZero, C);
+    reportBug("Division by zero", categories::LogicError, stateZero, C);
     return;
   }
 
   bool TaintedD = isTainted(C.getState(), *DV);
   if ((stateNotZero && stateZero && TaintedD)) {
-    reportBug("Division by a tainted value, possibly zero", stateZero, C,
-              std::make_unique<taint::TaintBugVisitor>(*DV));
+    reportBug("Division by a tainted value, possibly zero",
+              categories::TaintedData, stateZero, C);
     return;
   }
 
Index: clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
+++ clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -32,12 +32,12 @@
 namespace {
 class ArrayBoundCheckerV2 :
     public Checker<check::Location> {
-  mutable std::unique_ptr<BuiltinBug> BT;
+  mutable std::unique_ptr<BugType> BT;
 
   enum OOB_Kind { OOB_Precedes, OOB_Excedes, OOB_Tainted };
 
   void reportOOB(CheckerContext &C, ProgramStateRef errorState, OOB_Kind kind,
-                 std::unique_ptr<BugReporterVisitor> Visitor = nullptr) const;
+                 SVal &Offset) const;
 
 public:
   void checkLocation(SVal l, bool isLoad, const Stmt*S,
@@ -166,7 +166,8 @@
 
     // Are we constrained enough to definitely precede the lower bound?
     if (state_precedesLowerBound && !state_withinLowerBound) {
-      reportOOB(checkerContext, state_precedesLowerBound, OOB_Precedes);
+      reportOOB(checkerContext, state_precedesLowerBound, OOB_Precedes,
+                rawOffsetVal);
       return;
     }
 
@@ -208,14 +209,15 @@
       SVal ByteOffset = rawOffset.getByteOffset();
       if (isTainted(state, ByteOffset)) {
         reportOOB(checkerContext, state_exceedsUpperBound, OOB_Tainted,
-                  std::make_unique<TaintBugVisitor>(ByteOffset));
+                  rawOffsetVal);
         return;
       }
     } else if (state_exceedsUpperBound) {
       // If we are constrained enough to definitely exceed the upper bound,
       // report.
       assert(!state_withinUpperBound);
-      reportOOB(checkerContext, state_exceedsUpperBound, OOB_Excedes);
+      reportOOB(checkerContext, state_exceedsUpperBound, OOB_Excedes,
+                rawOffsetVal);
       return;
     }
 
@@ -227,16 +229,22 @@
   checkerContext.addTransition(state);
 }
 
-void ArrayBoundCheckerV2::reportOOB(
-    CheckerContext &checkerContext, ProgramStateRef errorState, OOB_Kind kind,
-    std::unique_ptr<BugReporterVisitor> Visitor) const {
+void ArrayBoundCheckerV2::reportOOB(CheckerContext &checkerContext,
+                                    ProgramStateRef errorState, OOB_Kind kind,
+                                    SVal &Offset) const {
 
   ExplodedNode *errorNode = checkerContext.generateErrorNode(errorState);
   if (!errorNode)
     return;
 
-  if (!BT)
-    BT.reset(new BuiltinBug(this, "Out-of-bound access"));
+  if (!BT) {
+    if (kind != OOB_Tainted)
+      BT.reset(
+          new BugType(this, "Out-of-bound access", categories::LogicError));
+    else
+      BT.reset(
+          new BugType(this, "Out-of-bound access", categories::TaintedData));
+  }
 
   // FIXME: This diagnostics are preliminary.  We should get far better
   // diagnostics for explaining buffer overruns.
@@ -255,9 +263,13 @@
     os << "(index is tainted)";
     break;
   }
+  std::optional<TaintData> TD = std::nullopt;
+  if (kind == OOB_Tainted)
+    TD = getTaintDataPointeeOrPointer(checkerContext, Offset);
 
-  auto BR = std::make_unique<PathSensitiveBugReport>(*BT, os.str(), errorNode);
-  BR->addVisitor(std::move(Visitor));
+  auto BR =
+      TD ? std::make_unique<TaintBugReport>(*BT, os.str(), errorNode, *TD)
+         : std::make_unique<PathSensitiveBugReport>(*BT, os.str(), errorNode);
   checkerContext.emitReport(std::move(BR));
 }
 
Index: clang/include/clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h
===================================================================
--- clang/include/clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h
+++ clang/include/clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h
@@ -22,6 +22,7 @@
 extern const char *const CXXMoveSemantics;
 extern const char *const SecurityError;
 extern const char *const UnusedCode;
+extern const char *const TaintedData;
 } // namespace categories
 } // namespace ento
 } // namespace clang
Index: clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporter.h
===================================================================
--- clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporter.h
+++ clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporter.h
@@ -118,7 +118,7 @@
 /// individual bug reports.
 class BugReport {
 public:
-  enum class Kind { Basic, PathSensitive };
+  enum class Kind { Basic, PathSensitive, TaintPathSensitive };
 
 protected:
   friend class BugReportEquivClass;
@@ -367,14 +367,16 @@
 
 public:
   PathSensitiveBugReport(const BugType &bt, StringRef desc,
-                         const ExplodedNode *errorNode)
-      : PathSensitiveBugReport(bt, desc, desc, errorNode) {}
+                         const ExplodedNode *errorNode,
+                         BugReport::Kind kind = Kind::PathSensitive)
+      : PathSensitiveBugReport(bt, desc, desc, errorNode, kind) {}
 
   PathSensitiveBugReport(const BugType &bt, StringRef shortDesc, StringRef desc,
-                         const ExplodedNode *errorNode)
+                         const ExplodedNode *errorNode,
+                         BugReport::Kind kind = Kind::PathSensitive)
       : PathSensitiveBugReport(bt, shortDesc, desc, errorNode,
                                /*LocationToUnique*/ {},
-                               /*DeclToUnique*/ nullptr) {}
+                               /*DeclToUnique*/ nullptr, kind) {}
 
   /// Create a PathSensitiveBugReport with a custom uniqueing location.
   ///
@@ -386,17 +388,20 @@
   PathSensitiveBugReport(const BugType &bt, StringRef desc,
                          const ExplodedNode *errorNode,
                          PathDiagnosticLocation LocationToUnique,
-                         const Decl *DeclToUnique)
+                         const Decl *DeclToUnique,
+                         BugReport::Kind kind = Kind::PathSensitive)
       : PathSensitiveBugReport(bt, desc, desc, errorNode, LocationToUnique,
-                               DeclToUnique) {}
+                               DeclToUnique, kind) {}
 
   PathSensitiveBugReport(const BugType &bt, StringRef shortDesc, StringRef desc,
                          const ExplodedNode *errorNode,
                          PathDiagnosticLocation LocationToUnique,
-                         const Decl *DeclToUnique);
+                         const Decl *DeclToUnique,
+                         BugReport::Kind kind = Kind::PathSensitive);
 
   static bool classof(const BugReport *R) {
-    return R->getKind() == Kind::PathSensitive;
+    return R->getKind() >= Kind::PathSensitive &&
+           R->getKind() <= Kind::TaintPathSensitive;
   }
 
   const ExplodedNode *getErrorNode() const { return ErrorNode; }
Index: clang/include/clang/StaticAnalyzer/Checkers/Taint.h
===================================================================
--- clang/include/clang/StaticAnalyzer/Checkers/Taint.h
+++ clang/include/clang/StaticAnalyzer/Checkers/Taint.h
@@ -13,7 +13,9 @@
 #ifndef LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_TAINT_H
 #define LLVM_CLANG_LIB_STATICANALYZER_CHECKERS_TAINT_H
 
+#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
 #include "clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
 
 namespace clang {
@@ -23,27 +25,48 @@
 /// The type of taint, which helps to differentiate between different types of
 /// taint.
 using TaintTagType = unsigned;
+using TaintFlowType = int;
 
 static constexpr TaintTagType TaintTagGeneric = 0;
+static constexpr TaintFlowType UninitializedFlow = -1;
+
+/// Used to store the taint flow related information
+/// in the program state.
+struct TaintData {
+  TaintTagType Type; // Type of the taintedness (e.g. sql escape checked)
+  TaintFlowType Flow; // Unique identifier of the taint flow
+  TaintData() : Type(TaintTagGeneric), Flow(UninitializedFlow){};
+  TaintData(TaintFlowType f) : Type(TaintTagGeneric), Flow(f){};
+  TaintData(TaintTagType t, TaintFlowType f) : Type(t), Flow(f){};
+  bool operator==(const TaintData &X) const { return Flow == X.Flow; }
+  int operator<(const TaintData &X) const { return Flow < X.Flow; }
+  void Profile(llvm::FoldingSetNodeID &ID) const { ID.AddInteger(Flow); }
+};
+
+static inline raw_ostream &operator<<(llvm::raw_ostream &Out,
+                                      const clang::ento::taint::TaintData t) {
+  Out << "Type:" << t.Type << " Flow:" << t.Flow << "\n";
+  return Out;
+}
 
 /// Create a new state in which the value of the statement is marked as tainted.
 [[nodiscard]] ProgramStateRef addTaint(ProgramStateRef State, const Stmt *S,
                                        const LocationContext *LCtx,
-                                       TaintTagType Kind = TaintTagGeneric);
+                                       TaintData Data = TaintData());
 
 /// Create a new state in which the value is marked as tainted.
 [[nodiscard]] ProgramStateRef addTaint(ProgramStateRef State, SVal V,
-                                       TaintTagType Kind = TaintTagGeneric);
+                                       TaintData Data = TaintData());
 
 /// Create a new state in which the symbol is marked as tainted.
 [[nodiscard]] ProgramStateRef addTaint(ProgramStateRef State, SymbolRef Sym,
-                                       TaintTagType Kind = TaintTagGeneric);
+                                       TaintData Data = TaintData());
 
 /// Create a new state in which the pointer represented by the region
 /// is marked as tainted.
 [[nodiscard]] ProgramStateRef addTaint(ProgramStateRef State,
                                        const MemRegion *R,
-                                       TaintTagType Kind = TaintTagGeneric);
+                                       TaintData Data = TaintData());
 
 [[nodiscard]] ProgramStateRef removeTaint(ProgramStateRef State, SVal V);
 
@@ -56,10 +79,38 @@
 /// This might be necessary when referring to regions that can not have an
 /// individual symbol, e.g. if they are represented by the default binding of
 /// a LazyCompoundVal.
-[[nodiscard]] ProgramStateRef
-addPartialTaint(ProgramStateRef State, SymbolRef ParentSym,
-                const SubRegion *SubRegion,
-                TaintTagType Kind = TaintTagGeneric);
+[[nodiscard]] ProgramStateRef addPartialTaint(ProgramStateRef State,
+                                              SymbolRef ParentSym,
+                                              const SubRegion *SubRegion,
+                                              TaintData = TaintData());
+
+std::optional<TaintData> getTaintData(ProgramStateRef State, const Stmt *S,
+                                 const LocationContext *LCtx);
+
+/// Get the taint data for a value if tainted in the given state.
+std::optional<TaintData> getTaintData(ProgramStateRef State, SVal V);
+
+/// Get the taint data for symbol if tainted in the given state.
+std::optional<TaintData> getTaintData(ProgramStateRef State, SymbolRef Sym);
+
+/// Get the Taint data for a pointer represented by the region
+// if tainted in the given state.
+std::optional<TaintData> getTaintData(ProgramStateRef State, const MemRegion *Reg);
+
+/// Check if the region the expression evaluates to is the standard input,
+/// and thus, is tainted.
+bool isStdin(SVal Val, const ASTContext &ACtx);
+
+/// Returns the TaintData for a pointed value (if tainted), otherwise
+/// the TaintData of the pointer itself.
+std::optional<TaintData> getTaintDataPointeeOrPointer(const CheckerContext &C,
+                                                 SVal Arg);
+std::optional<TaintData> getTaintDataPointeeOrPointer(const Expr *E,
+                                                 const ProgramStateRef &State,
+                                                 CheckerContext &C);
+
+SVal getPointeeOf(const CheckerContext &C, Loc LValue);
+std::optional<SVal> getPointeeOf(const CheckerContext &C, SVal Arg);
 
 /// Check if the statement has a tainted value in the given state.
 bool isTainted(ProgramStateRef State, const Stmt *S,
@@ -84,19 +135,19 @@
 
 LLVM_DUMP_METHOD void dumpTaint(ProgramStateRef State);
 
-/// The bug visitor prints a diagnostic message at the location where a given
-/// variable was tainted.
-class TaintBugVisitor final : public BugReporterVisitor {
-private:
-  const SVal V;
-
+/// Always use TaintBugReport to give taint related warnings in any checker.
+/// The TaintData member carries the flow associated data of the tainted SVal
+class TaintBugReport : public clang::ento::PathSensitiveBugReport {
 public:
-  TaintBugVisitor(const SVal V) : V(V) {}
-  void Profile(llvm::FoldingSetNodeID &ID) const override { ID.Add(V); }
-
-  PathDiagnosticPieceRef VisitNode(const ExplodedNode *N,
-                                   BugReporterContext &BRC,
-                                   PathSensitiveBugReport &BR) override;
+  TaintBugReport(const BugType &bt, StringRef desc,
+                 const ExplodedNode *errorNode, TaintData T)
+      : clang::ento::PathSensitiveBugReport(bt, desc, errorNode,
+                                            Kind::TaintPathSensitive),
+        taint_data(T){};
+  static bool classof(const BugReport *R) {
+    return R->getKind() == Kind::TaintPathSensitive;
+  }
+  TaintData taint_data;
 };
 
 } // namespace taint
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to