Currently, when the format string is a non-literal, the only warning emitted 
and checked for is the non-literal format string.  This patch checks the 
arguments passed to printf/scanf functions and checks that they are valid for 
the format string.  For instance, warning on user defined types or non-pointers 
to scanf functions.  Additionally, if only one argument is present, warn if it 
is a va_list.  For built-in functions, suggest the proper va_list function to 
fix it.

http://reviews.llvm.org/D8757

Files:
  include/clang/Basic/DiagnosticSemaKinds.td
  include/clang/Sema/Sema.h
  lib/Sema/SemaChecking.cpp
  test/SemaCXX/format-strings-invalid-args.cpp

EMAIL PREFERENCES
  http://reviews.llvm.org/settings/panel/emailpreferences/
Index: include/clang/Basic/DiagnosticSemaKinds.td
===================================================================
--- include/clang/Basic/DiagnosticSemaKinds.td
+++ include/clang/Basic/DiagnosticSemaKinds.td
@@ -5104,6 +5104,13 @@
 def warn_format_nonliteral : Warning<
   "format string is not a string literal">,
   InGroup<FormatNonLiteral>, DefaultIgnore;
+def warn_va_list_format_string_argument : Warning<
+  "passing a va_list to a variadic function">, InGroup<Format>;
+def note_va_list_function : Note<
+  "did you mean to use function %0 which takes a va_list argument?">;
+def warn_invalid_format_argument : Warning<
+   "invalid argument of type %0 to %select{printf|scanf}1 format string">,
+   InGroup<Format>;
 
 def err_unexpected_interface : Error<
   "unexpected interface name %0: expected expression">;
Index: include/clang/Sema/Sema.h
===================================================================
--- include/clang/Sema/Sema.h
+++ include/clang/Sema/Sema.h
@@ -8518,13 +8518,15 @@
                             bool IsCXXMember,
                             VariadicCallType CallType,
                             SourceLocation Loc, SourceRange Range,
-                            llvm::SmallBitVector &CheckedVarArgs);
+                            llvm::SmallBitVector &CheckedVarArgs,
+                            FunctionDecl *FD);
   bool CheckFormatArguments(ArrayRef<const Expr *> Args,
                             bool HasVAListArg, unsigned format_idx,
                             unsigned firstDataArg, FormatStringType Type,
                             VariadicCallType CallType,
                             SourceLocation Loc, SourceRange range,
-                            llvm::SmallBitVector &CheckedVarArgs);
+                            llvm::SmallBitVector &CheckedVarArgs,
+                            FunctionDecl *FD);
 
   void CheckAbsoluteValueFunction(const CallExpr *Call,
                                   const FunctionDecl *FDecl,
Index: lib/Sema/SemaChecking.cpp
===================================================================
--- lib/Sema/SemaChecking.cpp
+++ lib/Sema/SemaChecking.cpp
@@ -1149,7 +1149,7 @@
       CheckedVarArgs.resize(Args.size());
 
       CheckFormatArguments(I, Args, IsMemberFunction, CallType, Loc, Range,
-                           CheckedVarArgs);
+                           CheckedVarArgs, dyn_cast<FunctionDecl>(FDecl));
     }
   }
 
@@ -2747,21 +2747,114 @@
                                 bool IsCXXMember,
                                 VariadicCallType CallType,
                                 SourceLocation Loc, SourceRange Range,
-                                llvm::SmallBitVector &CheckedVarArgs) {
+                                llvm::SmallBitVector &CheckedVarArgs,
+                                FunctionDecl *FD) {
   FormatStringInfo FSI;
   if (getFormatStringInfo(Format, IsCXXMember, &FSI))
     return CheckFormatArguments(Args, FSI.HasVAListArg, FSI.FormatIdx,
                                 FSI.FirstDataArg, GetFormatStringType(Format),
-                                CallType, Loc, Range, CheckedVarArgs);
+                                CallType, Loc, Range, CheckedVarArgs, FD);
+  return false;
+}
+
+/// Given a builtin vararg function, return the equivalent va_list function, or
+/// 0 otherwise.
+static unsigned GetVAListFunction(unsigned BuiltinID) {
+  switch (BuiltinID) {
+    default:
+      return 0;
+    case Builtin::BIprintf:
+      return Builtin::BIvprintf;
+    case Builtin::BIfprintf:
+      return Builtin::BIvfprintf;
+    case Builtin::BIsnprintf:
+      return Builtin::BIvsnprintf;
+    case Builtin::BIsprintf:
+      return Builtin::BIvsprintf;
+    case Builtin::BIscanf:
+      return Builtin::BIvscanf;
+    case Builtin::BIfscanf:
+      return Builtin::BIvfscanf;
+    case Builtin::BIsscanf:
+      return Builtin::BIvsscanf;
+    case Builtin::BINSLog:
+      return Builtin::BINSLogv;
+  }
+}
+
+/// Emit a diagnostic with a fix-it hint to replace the range with builtin
+/// function, qualifying it with "::" if necessary.
+static void EmitBuiltinFunctionHints(Sema &SemaRef, unsigned BuiltinID,
+                                     SourceRange Range, unsigned DiagID) {
+  std::string ReplacementName = SemaRef.Context.BuiltinInfo.GetName(BuiltinID);
+  // Look up builtin function in TU scope.
+  DeclarationName Name(&SemaRef.Context.Idents.get(ReplacementName));
+  LookupResult Result(SemaRef, Name, Range.getBegin(), Sema::LookupAnyName);
+  SemaRef.LookupName(Result, SemaRef.TUScope);
+
+  // Skip notes if multiple results found in lookup.
+  if (!Result.empty() && !Result.isSingleResult())
+    return;
+
+  FunctionDecl *FD = 0;
+  bool FoundFunction = Result.isSingleResult();
+  // When one result is found, see if it is the correct function.
+  if (Result.isSingleResult()) {
+    FD = dyn_cast<FunctionDecl>(Result.getFoundDecl());
+    if (!FD || FD->getBuiltinID() != BuiltinID)
+      return;
+  }
+
+  // Look for local name conflict, prepend "::" as necessary.
+  Result.clear();
+  SemaRef.LookupName(Result, SemaRef.getCurScope());
+
+  if (!FoundFunction) {
+    if (!Result.empty()) {
+      ReplacementName = "::" + ReplacementName;
+    }
+  } else { // FoundFunction
+    if (Result.isSingleResult()) {
+      if (Result.getFoundDecl() != FD) {
+        ReplacementName = "::" + ReplacementName;
+      }
+    } else if (!Result.empty()) {
+      ReplacementName = "::" + ReplacementName;
+    }
+  }
+
+  SemaRef.Diag(Range.getBegin(), DiagID)
+      << ReplacementName
+      << FixItHint::CreateReplacement(Range, ReplacementName);
+
+  if (!FoundFunction) {
+    SemaRef.Diag(Range.getBegin(), diag::note_include_header_or_declare)
+        << SemaRef.Context.BuiltinInfo.getHeaderName(BuiltinID)
+        << SemaRef.Context.BuiltinInfo.GetName(BuiltinID);
+  }
+}
+
+/// Check to see if Ty is a va_list.  Since a va_list can be an array, also
+/// check if Ty is a pointer to an array element, which can happen if Ty
+/// is from a parameter.
+static bool IsVaListType(ASTContext &Context, QualType Ty) {
+  QualType VaListType = Context.getBuiltinVaListType();
+  if (Context.hasSameType(Ty, VaListType))
+    return true;
+  if (VaListType->isArrayType() && Ty->isPointerType())
+    return Context.hasSameType(
+        VaListType->getAsArrayTypeUnsafe()->getElementType(),
+        Ty->getPointeeType());
   return false;
 }
 
 bool Sema::CheckFormatArguments(ArrayRef<const Expr *> Args,
                                 bool HasVAListArg, unsigned format_idx,
                                 unsigned firstDataArg, FormatStringType Type,
                                 VariadicCallType CallType,
                                 SourceLocation Loc, SourceRange Range,
-                                llvm::SmallBitVector &CheckedVarArgs) {
+                                llvm::SmallBitVector &CheckedVarArgs,
+                                FunctionDecl *FD) {
   // CHECK: printf/scanf-like function is called with no format string.
   if (format_idx >= Args.size()) {
     Diag(Loc, diag::warn_missing_format_string) << Range;
@@ -2805,14 +2898,77 @@
 
   // If there are no arguments specified, warn with -Wformat-security, otherwise
   // warn only with -Wformat-nonliteral.
-  if (Args.size() == firstDataArg)
+  if (Args.size() == firstDataArg) {
     Diag(Args[format_idx]->getLocStart(),
          diag::warn_format_nonliteral_noargs)
       << OrigFormatExpr->getSourceRange();
-  else
-    Diag(Args[format_idx]->getLocStart(),
-         diag::warn_format_nonliteral)
-           << OrigFormatExpr->getSourceRange();
+    return false;
+  }
+
+  Diag(Args[format_idx]->getLocStart(), diag::warn_format_nonliteral)
+      << OrigFormatExpr->getSourceRange();
+
+  // Even with a non-literal format string, some checking on arguments is
+  // possible.
+  if (firstDataArg == 0)
+    return false;
+
+  if (firstDataArg + 1 == Args.size()) {
+    const Expr *Arg = Args[firstDataArg]->IgnoreImpCasts();
+    // Warn if there is exactly one variadic argument and that argument is a
+    // va_list type.  If possible, suggest the the va_list equivalent function.
+    if (IsVaListType(Context, Arg->getType())) {
+      Diag(Arg->getLocStart(), diag::warn_va_list_format_string_argument);
+
+      if (!FD)
+        return false;
+      const unsigned BuiltinID = GetVAListFunction(FD->getBuiltinID());
+      if (BuiltinID == 0)
+        return false;
+
+      EmitBuiltinFunctionHints(*this, BuiltinID, Range,
+                               diag::note_va_list_function);
+      return false;
+    }
+  }
+
+  if (Type == FST_Printf) {
+    for (unsigned i = firstDataArg, e = Args.size(); i < e; ++i) {
+      const Expr *Arg = Args[i];
+      QualType T = Arg->getType();
+      // Non-POD arguments are checked by -Wvarargs
+      if (!T.isPODType(Context))
+        continue;
+      if (T->isIntegerType() || T->isFloatingType())
+        continue;
+      if (T->isPointerType())
+        continue;
+
+      Diag(Arg->getLocStart(), diag::warn_invalid_format_argument)
+          << Arg->getType() << Arg->getSourceRange() << 0 /*printf*/;
+    }
+    return false;
+  }
+
+  if (Type == FST_Scanf) {
+    for (unsigned i = firstDataArg, e = Args.size(); i < e; ++i) {
+      const Expr *Arg = Args[i];
+      QualType T = Arg->getType();
+      // Non-POD arguments are checked by -Wvarargs
+      if (!T.isPODType(Context))
+        continue;
+      if (T->isPointerType()) {
+        QualType Pointee = T->getPointeeType();
+        if (!Pointee.isConstQualified())
+          if (Pointee->isIntegerType() || Pointee->isFloatingType())
+            continue;
+      }
+      Diag(Arg->getLocStart(), diag::warn_invalid_format_argument)
+          << Arg->getType() << Arg->getSourceRange() << 1 /*scanf*/;
+    }
+    return false;
+  }
+
   return false;
 }
 
Index: test/SemaCXX/format-strings-invalid-args.cpp
===================================================================
--- test/SemaCXX/format-strings-invalid-args.cpp
+++ test/SemaCXX/format-strings-invalid-args.cpp
@@ -0,0 +1,106 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s -Wformat
+typedef __SIZE_TYPE__ size_t;
+typedef __builtin_va_list va_list;
+
+typedef struct _FILE FILE;
+
+extern "C" {
+
+int printf(const char *, ...);
+int snprintf(char *, size_t, const char *, ...);
+int sprintf(char *, const char *, ...);
+int asprintf(char **, const char *, ...);
+
+int vprintf(const char *, va_list);
+int vsnprintf(char *, size_t, const char *, va_list);
+int vsprintf(char *, const char *, va_list);
+int vasprintf(char **, const char *, va_list);
+
+
+int fprintf(FILE *, const char *, ...);
+
+int scanf(const char*, ...);
+int fscanf(FILE*, const char*, ...);
+int sscanf(const char*,  const char*, ...);
+
+}
+
+void test_builtins(const char *str, va_list vlist, FILE *file, char *out) {
+  printf(str, vlist);
+  // expected-warning@-1{{passing a va_list to a variadic function}}
+  // expected-note@-2{{did you mean to use function vprintf which takes a va_list argument?}}
+  snprintf(out, 0, str, vlist);
+  // expected-warning@-1{{passing a va_list to a variadic function}}
+  // expected-note@-2{{did you mean to use function vsnprintf which takes a va_list argument?}}
+  sprintf(out, str, vlist);
+  // expected-warning@-1{{passing a va_list to a variadic function}}
+  // expected-note@-2{{did you mean to use function vsprintf which takes a va_list argument?}}
+  asprintf(&out, str, vlist);
+  // expected-warning@-1{{passing a va_list to a variadic function}}
+
+  fprintf(file, str, vlist);
+  // expected-warning@-1{{passing a va_list to a variadic function}}
+  // expected-note@-2{{did you mean to use function vfprintf which takes a va_list argument?}}
+  // expected-note@-3{{include the header <stdio.h> or explicitly provide a declaration for 'vfprintf'}}
+  scanf(str, vlist);
+  // expected-warning@-1{{passing a va_list to a variadic function}}
+  // expected-note@-2{{did you mean to use function vscanf which takes a va_list argument?}}
+  // expected-note@-3{{include the header <stdio.h> or explicitly provide a declaration for 'vscanf'}}
+  fscanf(file, str, vlist);
+  // expected-warning@-1{{passing a va_list to a variadic function}}
+  // expected-note@-2{{did you mean to use function vfscanf which takes a va_list argument?}}
+  // expected-note@-3{{include the header <stdio.h> or explicitly provide a declaration for 'vfscanf'}}
+  sscanf(out, str, vlist);
+  // expected-warning@-1{{passing a va_list to a variadic function}}
+  // expected-note@-2{{did you mean to use function vsscanf which takes a va_list argument?}}
+  // expected-note@-3{{include the header <stdio.h> or explicitly provide a declaration for 'vsscanf'}}
+}
+
+
+void print(const char*, ...) __attribute__((__format__(__printf__, 1, 2)));
+void scan(const char*, ...) __attribute__((__format__(__scanf__, 1, 2)));
+
+void test(const char* str) {
+  va_list list;
+  print(str, list);
+  // expected-warning@-1{{passing a va_list to a variadic function}}
+  scan(str, list);
+  // expected-warning@-1{{passing a va_list to a variadic function}}
+
+  int i;
+  long l;
+  long long ll;
+  unsigned int ui;
+  unsigned long ul;
+  unsigned long long ull;
+  const char * cc;
+  char * c;
+  char ac[5];
+  const char acc[5] = "Hi";
+  enum E { E1 } e;
+  struct X { } x;
+
+  print(str, i, l, ll, ui, ul, ull, cc, c, ac, acc, e, E1);
+  print(str, &i, &l, &ui, &ul, &ull, &cc, &c, &e);
+
+  print(str, x);
+  // expected-warning@-1{{invalid argument of type 'struct X' to printf format string}}
+
+  scan(str, &i, &l, &ui, &ul, &ull, &e);
+  scan(str, c, ac);
+
+  scan(str, i, l, ll, ui, ul, ull, e);
+  // expected-warning@-1{{invalid argument of type 'int' to scanf format string}}
+  // expected-warning@-2{{invalid argument of type 'long' to scanf format string}}
+  // expected-warning@-3{{invalid argument of type 'long long' to scanf format string}}
+  // expected-warning@-4{{invalid argument of type 'unsigned int' to scanf format string}}
+  // expected-warning@-5{{invalid argument of type 'unsigned long' to scanf format string}}
+  // expected-warning@-6{{invalid argument of type 'unsigned long long' to scanf format string}}
+  // expected-warning@-7{{invalid argument of type 'int' to scanf format string}}
+  scanf(str, cc, acc);
+  // expected-warning@-1{{invalid argument of type 'const char *' to scanf format string}}
+  // expected-warning@-2{{invalid argument of type 'const char *' to scanf format string}}
+  scanf(str, x, &x);
+  // expected-warning@-1{{invalid argument of type 'struct X' to scanf format string}}
+  // expected-warning@-2{{invalid argument of type 'struct X *' to scanf format string}}
+}
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits

Reply via email to