Issue: https://llvm.org/bugs/show_bug.cgi?id=21124

Patch to improve Clang's static analysis of
__builtin_sprintf_chk so that it will outperform the
analysis of g++.

http://reviews.llvm.org/D9665

Files:
  include/clang/Sema/Sema.h
  lib/Sema/SemaChecking.cpp
  test/SemaCXX/sprintf-chk.cpp

EMAIL PREFERENCES
  http://reviews.llvm.org/settings/panel/emailpreferences/
Index: include/clang/Sema/Sema.h
===================================================================
--- include/clang/Sema/Sema.h
+++ include/clang/Sema/Sema.h
@@ -8521,6 +8521,10 @@
   ExprResult CheckBuiltinFunctionCall(FunctionDecl *FDecl,
 		                      unsigned BuiltinID, CallExpr *TheCall);
 
+  void CheckSprintfMemAlloc(FunctionDecl *FDecl, CallExpr *TheCall,
+                             unsigned BufIdx, unsigned FmtIdx,
+                             unsigned DataIdx);
+
   bool CheckARMBuiltinExclusiveCall(unsigned BuiltinID, CallExpr *TheCall,
                                     unsigned MaxWidth);
   bool CheckNeonBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
@@ -8562,6 +8566,7 @@
   enum FormatStringType {
     FST_Scanf,
     FST_Printf,
+    FST_Sprintf,
     FST_NSString,
     FST_Strftime,
     FST_Strfmon,
Index: lib/Sema/SemaChecking.cpp
===================================================================
--- lib/Sema/SemaChecking.cpp
+++ lib/Sema/SemaChecking.cpp
@@ -480,6 +480,9 @@
   case Builtin::BI__builtin___stpncpy_chk:
     SemaBuiltinMemChkCall(*this, FDecl, TheCall, 2, 3);
     break;
+  case Builtin::BI__builtin___sprintf_chk:
+    CheckSprintfMemAlloc(FDecl, TheCall, 0, 3, 4);
+    break;
   case Builtin::BI__builtin___memccpy_chk:
     SemaBuiltinMemChkCall(*this, FDecl, TheCall, 3, 4);
     break;
@@ -1317,6 +1320,40 @@
   return false;
 }
 
+void Sema::CheckSprintfMemAlloc(FunctionDecl *FDecl, CallExpr *TheCall,
+                                unsigned BufIdx, unsigned FmtIdx,
+                                unsigned DataIdx) {
+  unsigned NumArgs = TheCall->getNumArgs();
+  if (NumArgs <= BufIdx ||
+      NumArgs <= FmtIdx ||
+      NumArgs <= DataIdx)
+    return;
+
+  const Expr *FmtArg = TheCall->getArg(FmtIdx);
+  if (!isa<StringLiteral>(FmtArg->IgnoreImplicit()))
+    return;
+
+  const FunctionProtoType *Proto =
+    FDecl->getType()->castAs<FunctionProtoType>();
+  VariadicCallType CallType = getVariadicCallType(FDecl, Proto,
+                                                    TheCall->getCallee());
+  llvm::ArrayRef<const Expr*> Args =
+    llvm::makeArrayRef(TheCall->getArgs(), NumArgs);
+  llvm::SmallBitVector CheckedVarArgs;
+  if (FDecl) {
+    for (const auto *I : FDecl->specific_attrs<FormatAttr>()) {
+      // Only create vector if there are format attributes.
+      CheckedVarArgs.resize(Args.size());
+      CheckFormatArguments(I, Args, /*IsMemberFunction=*/false, CallType,
+                           TheCall->getLocStart(), FDecl->getSourceRange(),
+                           CheckedVarArgs);
+    }
+  }
+  const StringLiteral *SL = cast<StringLiteral>(FmtArg->IgnoreImplicit());
+  CheckFormatString(SL, FmtArg, Args, true, FmtIdx,  DataIdx, FST_Sprintf,
+                    /*inFunctionCall=*/true, CallType, CheckedVarArgs);
+}
+
 bool Sema::CheckObjCMethodCall(ObjCMethodDecl *Method, SourceLocation lbrac, 
                                ArrayRef<const Expr *> Args) {
   VariadicCallType CallType =
@@ -3355,7 +3392,7 @@
   bool checkForCStrMembers(const analyze_printf::ArgType &AT,
                            const Expr *E);
 
-};  
+};
 }
 
 bool CheckPrintfHandler::HandleInvalidPrintfConversionSpecifier(
@@ -4050,6 +4087,177 @@
   return true;
 }
 
+static const ConstantArrayType* GetArrayType(Sema &S, const Expr *Array) {
+  if (isa<DeclRefExpr>(Array)) {
+    QualType ArrayTy = cast<DeclRefExpr>(Array)->getDecl()->getType();
+    if (ArrayTy->isConstantArrayType())
+      return S.Context.getAsConstantArrayType(Array->getType());
+  }
+  return nullptr;
+}
+
+// CHECK: Sprintf format string checking, uses printf checking with added data
+// size analysing to warn of memory overflows.
+namespace {
+  class CheckSprintfHandler : public CheckPrintfHandler {
+  public:
+    CheckSprintfHandler(Sema &S, const StringLiteral *FExpr,
+                        const Expr *OrigFormatExpr, unsigned FirstDataArg,
+                        unsigned numDataArgs, bool isObjC,
+                        const char *Begin, bool hasVAListArg,
+                        ArrayRef<const Expr*> Args,
+                        unsigned FormatIdx, bool inFunctionCall,
+                        Sema::VariadicCallType CallType,
+                        llvm::SmallBitVector &CheckedVarArgs)
+      : CheckPrintfHandler(S, FExpr, OrigFormatExpr, FirstDataArg, numDataArgs,
+                           isObjC, Begin, hasVAListArg, Args, FormatIdx,
+                           inFunctionCall, CallType, CheckedVarArgs),
+      Args(Args), S(S) {
+        StorageBufSize = llvm::APInt(64, 0);
+        StorageRequired = llvm::APInt(64, 0);
+      }
+
+    bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
+                               const char *startSpecifier,
+                               unsigned specifierLen) override;
+
+    bool Init() {
+      if (const ConstantArrayType *StorageBufTy =
+        GetArrayType(S, Args[0]->IgnoreImplicit())) {
+        StorageBufSize = StorageBufTy->getSize();
+        return true;
+      }
+      return false;
+    }
+
+  private:
+    ArrayRef<const Expr*> Args;
+    Sema &S;
+    llvm::APInt StorageBufSize;
+    llvm::APInt StorageRequired;
+  };
+}
+
+// Analyses the data arguments to sprintf to warn the user of memory overflows.
+// Mainly handles integer and string literals with their specifiers and flags.
+bool
+CheckSprintfHandler::HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier
+                                           &FS, const char *startSpecifier,
+                                           unsigned specifierLen) {
+  // Call the parent call first to do perform the validation on the specifier.
+  if (!CheckPrintfHandler::HandlePrintfSpecifier(FS, startSpecifier,
+                                                 specifierLen))
+    return false;
+
+  using namespace analyze_format_string;
+  using namespace analyze_printf;
+
+  const PrintfConversionSpecifier &CS = FS.getConversionSpecifier();
+  ConversionSpecifier::Kind CSKind = CS.getKind();
+  OptionalAmount Precision = FS.getPrecision();
+  OptionalAmount Width = FS.getFieldWidth();
+
+  // Offset the argument index to the data items to be printed.
+  unsigned Idx = 4 + FS.getArgIndex();
+  const Expr* Data = Args[Idx]->IgnoreImplicit();
+
+  // An extra will be required for +/-
+  if (FS.hasPlusPrefix() || FS.hasSpacePrefix())
+    ++StorageRequired;
+
+  if (isa<StringLiteral>(Data)) {
+    auto SL = cast<StringLiteral>(Data);
+    StorageRequired += llvm::APInt(64, SL->getByteLength());
+  }
+  else if (isa<IntegerLiteral>(Data)) {
+    // Default to decimal representation.
+    unsigned Radix = 10;
+
+    // Unsigned hexadecimal integers and pointer addresses.
+    if (CSKind == ConversionSpecifier::xArg ||
+        CSKind == ConversionSpecifier::XArg ||
+        CSKind == ConversionSpecifier::pArg)
+      Radix = 16;
+
+    // Unsigned octal.
+    else if (CSKind == ConversionSpecifier::oArg ||
+             CSKind == ConversionSpecifier::OArg)
+      Radix = 8;
+
+    // Create a real string to use its size.
+    std::string DataStr =
+      cast<IntegerLiteral>(Data)->getValue().toString(Radix, true);
+    StorageRequired += llvm::APInt(64, DataStr.length());
+  }
+  else if (isa<FloatingLiteral>(Data)) {
+    llvm::APFloat Value = cast<FloatingLiteral>(Data)->getValue();
+
+    // APFloat has a member function for printing the value to a hex
+    // representation.
+    if (CSKind == ConversionSpecifier::aArg ||
+        CSKind == ConversionSpecifier::AArg) {
+      char TempBuf[32];
+      unsigned HexDigits = 0;
+
+      if (Precision.getHowSpecified() == OptionalAmount::Constant)
+        HexDigits = Precision.getConstantAmount();
+
+      // Get the number of digits used to print the value.
+      unsigned NumDigits =
+        Value.convertToHexString(TempBuf, HexDigits, false,
+                                 llvm::APFloat::rmNearestTiesToEven);
+      StorageRequired += llvm::APInt(64, NumDigits);
+    }
+  }
+  else if (auto DataBuf = GetArrayType(S, Data)) {
+    llvm::APInt BufSize = DataBuf->getSize();
+    StorageRequired += BufSize;
+  }
+
+  if (FS.hasAlternativeForm()) {
+    // 0x will be prepended for oct and hex values.
+    if (CSKind == ConversionSpecifier::xArg ||
+        CSKind == ConversionSpecifier::XArg ||
+        CSKind == ConversionSpecifier::oArg ||
+        CSKind == ConversionSpecifier::OArg)
+      StorageRequired += llvm::APInt(64, 2);
+  }
+
+  // Pointer address have 'p' prepended
+  if (CSKind == ConversionSpecifier::pArg)
+    ++StorageRequired;
+
+  // The calculated sizes could be smaller than sizes specified by the user
+  // in their format string.
+  if (CS.isAnyIntArg()) {
+    // For integers, the precision modifier specifies how many digits to be
+    // written so this may overwrite the calculated size.
+    if (Precision.getHowSpecified() == OptionalAmount::Constant) {
+      if (StorageRequired.ule(Precision.getConstantAmount()))
+        StorageRequired = llvm::APInt(64, Precision.getConstantAmount());
+    }
+  }
+
+  // Width specifies the minimum number of characters to be printed.
+  if (Width.getHowSpecified() == OptionalAmount::Constant) {
+    if (StorageRequired.ule(Width.getConstantAmount()))
+      StorageRequired = llvm::APInt(64, Width.getConstantAmount());
+  }
+
+  // Compare less or equal to account for the null terminator that will
+  // be appended.
+  if (StorageBufSize.ule(StorageRequired)) {
+    //S.Diag(Args[0]->getLocStart(), diag::warn_sprintf_chk_overflow)
+    S.Diag(Args[0]->getLocStart(), diag::warn_memcpy_chk_overflow)
+      << getSpecifierRange(startSpecifier, specifierLen)
+      << Data->getSourceRange()
+      << Args[Idx]->getType();
+    return false;
+  }
+
+  return true;
+}
+
 //===--- CHECK: Scanf format string checking ------------------------------===//
 
 namespace {  
@@ -4295,6 +4503,22 @@
                                                  getLangOpts(),
                                                  Context.getTargetInfo()))
       H.DoneProcessing();
+  }
+  else if (Type == FST_Sprintf) {
+    // Sprintf has the same parsing and checking as printf, but we can also
+    // perform memory overflow checks.
+    CheckSprintfHandler H(*this, FExpr, OrigFormatExpr, firstDataArg,
+                          numDataArgs,
+                          (Type == FST_NSString || Type == FST_OSTrace),
+                          Str, HasVAListArg, Args, format_idx,
+                          inFunctionCall, CallType, CheckedVarArgs);
+    if (H.Init()) {
+      if (!analyze_format_string::ParsePrintfString(H, Str, Str + StrLen,
+                                                    getLangOpts(),
+                                                    Context.getTargetInfo(),
+                                                    Type == FST_FreeBSDKPrintf))
+        H.DoneProcessing();
+    }
   } // TODO: handle other formats
 }
 
Index: test/SemaCXX/sprintf-chk.cpp
===================================================================
--- /dev/null
+++ test/SemaCXX/sprintf-chk.cpp
@@ -0,0 +1,70 @@
+// RUN: %clang_cc1 -fsyntax-only -verify -fsyntax-only %s
+void test() {
+
+  char bufA[4];
+  __builtin___sprintf_chk(bufA, 0, __builtin_object_size(bufA, 0), "%d", 100);
+  __builtin___sprintf_chk(bufA, 0, __builtin_object_size(bufA, 0), "%i", 100);
+  __builtin___sprintf_chk(bufA, 0, __builtin_object_size(bufA, 0), "%u", 100);
+  __builtin___sprintf_chk(bufA, 0, __builtin_object_size(bufA, 0), "%x", 100);
+  __builtin___sprintf_chk(bufA, 0, __builtin_object_size(bufA, 0), "%X", 100);
+  __builtin___sprintf_chk(bufA, 0, __builtin_object_size(bufA, 0), "%o", 100);
+  __builtin___sprintf_chk(bufA, 0, __builtin_object_size(bufA, 0), "%s", "foo");
+
+  __builtin___sprintf_chk(bufA, 0, __builtin_object_size(bufA, 0), "%d%d", 10, 10);   // expected-warning {{will always overflow destination buffer}}
+  __builtin___sprintf_chk(bufA, 0, __builtin_object_size(bufA, 0), "%i%i", 10, 10);   // expected-warning {{will always overflow destination buffer}}
+  __builtin___sprintf_chk(bufA, 0, __builtin_object_size(bufA, 0), "%u%u", 10, 10);   // expected-warning {{will always overflow destination buffer}}
+  __builtin___sprintf_chk(bufA, 0, __builtin_object_size(bufA, 0), "%x%x", 100, 100); // expected-warning {{will always overflow destination buffer}}
+  __builtin___sprintf_chk(bufA, 0, __builtin_object_size(bufA, 0), "%X%X", 100, 100); // expected-warning {{will always overflow destination buffer}}
+  __builtin___sprintf_chk(bufA, 0, __builtin_object_size(bufA, 0), "%o%o", 100, 100); // expected-warning {{will always overflow destination buffer}}
+
+  __builtin___sprintf_chk(bufA, 0, __builtin_object_size(bufA, 0), "%d", 1000);     // expected-warning {{will always overflow destination buffer}}
+  __builtin___sprintf_chk(bufA, 0, __builtin_object_size(bufA, 0), "%i", 1000);     // expected-warning {{will always overflow destination buffer}}
+  __builtin___sprintf_chk(bufA, 0, __builtin_object_size(bufA, 0), "%u", 1000);     // expected-warning {{will always overflow destination buffer}}
+  __builtin___sprintf_chk(bufA, 0, __builtin_object_size(bufA, 0), "%x", 10000);    // expected-warning {{will always overflow destination buffer}}
+  __builtin___sprintf_chk(bufA, 0, __builtin_object_size(bufA, 0), "%X", 10000);    // expected-warning {{will always overflow destination buffer}}
+  __builtin___sprintf_chk(bufA, 0, __builtin_object_size(bufA, 0), "%o", 1000);     // expected-warning {{will always overflow destination buffer}}
+  __builtin___sprintf_chk(bufA, 0, __builtin_object_size(bufA, 0), "%a", 10.0);     // expected-warning {{will always overflow destination buffer}}
+  __builtin___sprintf_chk(bufA, 0, __builtin_object_size(bufA, 0), "%A", 10.0);     // expected-warning {{will always overflow destination buffer}}
+  __builtin___sprintf_chk(bufA, 0, __builtin_object_size(bufA, 0), "%s", "foobar"); // expected-warning {{will always overflow destination buffer}}
+
+  __builtin___sprintf_chk(bufA, 0, __builtin_object_size(bufA, 0), "%+d", 100);     // expected-warning {{will always overflow destination buffer}}
+  __builtin___sprintf_chk(bufA, 0, __builtin_object_size(bufA, 0), "%+d", 10);
+
+  __builtin___sprintf_chk(bufA, 0, __builtin_object_size(bufA, 0), "%3d", 100);
+  __builtin___sprintf_chk(bufA, 0, __builtin_object_size(bufA, 0), "%3i", 100);
+  __builtin___sprintf_chk(bufA, 0, __builtin_object_size(bufA, 0), "%3u", 100);
+  __builtin___sprintf_chk(bufA, 0, __builtin_object_size(bufA, 0), "%3x", 100);
+  __builtin___sprintf_chk(bufA, 0, __builtin_object_size(bufA, 0), "%3X", 100);
+  __builtin___sprintf_chk(bufA, 0, __builtin_object_size(bufA, 0), "%3o", 100);
+
+  __builtin___sprintf_chk(bufA, 0, __builtin_object_size(bufA, 0), "%4d", 100);     // expected-warning {{will always overflow destination buffer}}
+  __builtin___sprintf_chk(bufA, 0, __builtin_object_size(bufA, 0), "%4i", 100);     // expected-warning {{will always overflow destination buffer}}
+  __builtin___sprintf_chk(bufA, 0, __builtin_object_size(bufA, 0), "%4u", 100);     // expected-warning {{will always overflow destination buffer}}
+  __builtin___sprintf_chk(bufA, 0, __builtin_object_size(bufA, 0), "%4x", 100);     // expected-warning {{will always overflow destination buffer}}
+  __builtin___sprintf_chk(bufA, 0, __builtin_object_size(bufA, 0), "%4X", 100);     // expected-warning {{will always overflow destination buffer}}
+  __builtin___sprintf_chk(bufA, 0, __builtin_object_size(bufA, 0), "%4o", 100);     // expected-warning {{will always overflow destination buffer}}
+
+  __builtin___sprintf_chk(bufA, 0, __builtin_object_size(bufA, 0), "%#x", 15);
+  __builtin___sprintf_chk(bufA, 0, __builtin_object_size(bufA, 0), "%#X", 15);
+  __builtin___sprintf_chk(bufA, 0, __builtin_object_size(bufA, 0), "%#o", 7);
+  __builtin___sprintf_chk(bufA, 0, __builtin_object_size(bufA, 0), "%#x", 100);     // expected-warning {{will always overflow destination buffer}}
+  __builtin___sprintf_chk(bufA, 0, __builtin_object_size(bufA, 0), "%#X", 100);     // expected-warning {{will always overflow destination buffer}}
+  __builtin___sprintf_chk(bufA, 0, __builtin_object_size(bufA, 0), "%#o", 100);     // expected-warning {{will always overflow destination buffer}}
+
+
+  char bufB[7];
+  __builtin___sprintf_chk(bufB, 0, __builtin_object_size(bufB, 0), "%a", 0.5);
+  __builtin___sprintf_chk(bufB, 0, __builtin_object_size(bufB, 0), "%A", 0.5);
+  __builtin___sprintf_chk(bufB, 0, __builtin_object_size(bufB, 0), "%a", 10.0);     // expected-warning {{will always overflow destination buffer}}
+  __builtin___sprintf_chk(bufB, 0, __builtin_object_size(bufB, 0), "%A", 10.0);     // expected-warning {{will always overflow destination buffer}}
+  __builtin___sprintf_chk(bufB, 0, __builtin_object_size(bufB, 0), "%1.1a", 0.5);
+  __builtin___sprintf_chk(bufB, 0, __builtin_object_size(bufB, 0), "%1.1A", 0.5);
+  __builtin___sprintf_chk(bufB, 0, __builtin_object_size(bufB, 0), "%1.2a", 0.5);   // expected-warning {{will always overflow destination buffer}}
+  __builtin___sprintf_chk(bufB, 0, __builtin_object_size(bufB, 0), "%1.2A", 0.5);   // expected-warning {{will always overflow destination buffer}}
+
+  char bufC[5];
+  __builtin___sprintf_chk(bufC, 0, __builtin_object_size(bufC, 0), "%s%s",          // expected-warning {{will always overflow destination buffer}}
+                          "foo", "bar");
+  const char bar[] = "foobar";
+  __builtin___sprintf_chk(bufC, 0, __builtin_object_size(bufC, 0), "%s", bar);      // expected-warning {{will always overflow destination buffer}}
+}
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits

Reply via email to