meikeb created this revision.
meikeb added a reviewer: rsmith.
meikeb added a subscriber: cfe-commits.

The warning for a format string not being a sting literal and therefore
being potentially insecure is overly strict for indecies into sting
literals. This fix checks if the index into the string literal is
precomputable. If thats the case it will check if the suffix of that
sting literal is a valid format string string literal. It will still
issue the aforementioned warning for out of range indecies into the
string literal.

https://reviews.llvm.org/D23820

Files:
  lib/Sema/SemaChecking.cpp
  test/Sema/format-strings.c

Index: test/Sema/format-strings.c
===================================================================
--- test/Sema/format-strings.c
+++ test/Sema/format-strings.c
@@ -652,3 +652,34 @@
   // expected-note@-1{{treat the string as an argument to avoid this}}
 }
 #pragma GCC diagnostic warning "-Wformat-nonliteral"
+
+void test_char_pointer_arithmetic(int b) {
+  const char s1[] = "string";
+  const char s2[] = "%s string";
+
+  printf(s1 - 1); // expected-warning {{format string is not a string literal (potentially insecure)}}
+  // expected-note@-1{{treat the string as an argument to avoid this}}
+
+  printf(s1 + 2); // no-warning
+  printf(s2 + 2); // no-warning
+
+  const char s3[] = "%s string";
+  printf((s3 + 2) - 2); // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(2 + s2);            // no-warning
+  printf(6 + s2 - 2);        // no-warning
+  printf(2 + (b ? s1 : s2)); // no-warning
+
+  const char s5[] = "string %s";
+  printf(2 + (b ? s2 : s5)); // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(2 + (b ? s2 : s5), "");     // no-warning
+  printf(2 + (b ? s1 : s2 - 2), ""); // no-warning
+
+  const char s6[] = "%s string";
+  printf(2 + (b ? s1 : s6 - 2)); // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(1 ? s2 + 2 : s2);         // no-warning
+  printf(0 ? s2 : s2 + 2);         // no-warning
+  printf(2 + s2 + 5 * 3 - 16, ""); // expected-warning{{data argument not used}}
+}
Index: lib/Sema/SemaChecking.cpp
===================================================================
--- lib/Sema/SemaChecking.cpp
+++ lib/Sema/SemaChecking.cpp
@@ -3860,7 +3860,8 @@
                       unsigned firstDataArg, Sema::FormatStringType Type,
                       Sema::VariadicCallType CallType, bool InFunctionCall,
                       llvm::SmallBitVector &CheckedVarArgs,
-                      UncoveredArgHandler &UncoveredArg) {
+                      UncoveredArgHandler &UncoveredArg,
+                      int64_t &Offset) {
  tryAgain:
   if (E->isTypeDependent() || E->isValueDependent())
     return SLCT_NotALiteral;
@@ -3895,23 +3896,31 @@
         CheckLeft = false;
     }
 
+    // We need to maintain the offsets for the right and the left hand side
+    // seperately to check if every possible indexed expression is a valid
+    // string literal. They might have different offsets for different string
+    // literals in the end.
+    int64_t LOffset = Offset;
     StringLiteralCheckType Left;
     if (!CheckLeft)
       Left = SLCT_UncheckedLiteral;
     else {
       Left = checkFormatStringExpr(S, C->getTrueExpr(), Args,
                                    HasVAListArg, format_idx, firstDataArg,
                                    Type, CallType, InFunctionCall,
-                                   CheckedVarArgs, UncoveredArg);
-      if (Left == SLCT_NotALiteral || !CheckRight)
+                                   CheckedVarArgs, UncoveredArg, LOffset);
+      if (Left == SLCT_NotALiteral || !CheckRight) {
+        Offset = LOffset;
         return Left;
+      }
     }
 
+    int64_t ROffset = Offset;
     StringLiteralCheckType Right =
         checkFormatStringExpr(S, C->getFalseExpr(), Args,
                               HasVAListArg, format_idx, firstDataArg,
                               Type, CallType, InFunctionCall, CheckedVarArgs,
-                              UncoveredArg);
+                              UncoveredArg, ROffset);
 
     return (CheckLeft && Left < Right) ? Left : Right;
   }
@@ -3964,8 +3973,8 @@
           return checkFormatStringExpr(S, Init, Args,
                                        HasVAListArg, format_idx,
                                        firstDataArg, Type, CallType,
-                                       /*InFunctionCall*/false, CheckedVarArgs,
-                                       UncoveredArg);
+                                       /*InFunctionCall*/ false, CheckedVarArgs,
+                                       UncoveredArg, Offset);
         }
       }
 
@@ -4020,7 +4029,7 @@
         return checkFormatStringExpr(S, Arg, Args,
                                      HasVAListArg, format_idx, firstDataArg,
                                      Type, CallType, InFunctionCall,
-                                     CheckedVarArgs, UncoveredArg);
+                                     CheckedVarArgs, UncoveredArg, Offset);
       } else if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(ND)) {
         unsigned BuiltinID = FD->getBuiltinID();
         if (BuiltinID == Builtin::BI__builtin___CFStringMakeConstantString ||
@@ -4030,7 +4039,7 @@
                                        HasVAListArg, format_idx,
                                        firstDataArg, Type, CallType,
                                        InFunctionCall, CheckedVarArgs,
-                                       UncoveredArg);
+                                       UncoveredArg, Offset);
         }
       }
     }
@@ -4047,14 +4056,64 @@
       StrE = cast<StringLiteral>(E);
 
     if (StrE) {
+      if (Offset > 0) {
+        // We create a new string literal based on the computed fixed offset
+        // into the original string literal.
+        StrE = StringLiteral::Create(S.Context,
+                                     StrE->getString().drop_front(Offset),
+                                     StrE->getKind(),
+                                     StrE->isPascal(),
+                                     StrE->getType(),
+                                     StrE->getLocStart());
+      } else if (Offset < 0) {
+        // TODO: It would be better to have an explicit warning for out of
+        // bounds literals.
+        return SLCT_NotALiteral;
+      }
       CheckFormatString(S, StrE, E, Args, HasVAListArg, format_idx,
                         firstDataArg, Type, InFunctionCall, CallType,
                         CheckedVarArgs, UncoveredArg);
       return SLCT_CheckedLiteral;
     }
 
     return SLCT_NotALiteral;
   }
+  case Stmt::BinaryOperatorClass: {
+    llvm::APSInt LResult;
+    llvm::APSInt RResult;
+
+    const BinaryOperator *BinOp = cast<BinaryOperator>(E);
+
+    // A string literal + an int offset is still a string literal.
+    if (BinOp->isAdditiveOp()) {
+      bool LIsInt = BinOp->getLHS()->EvaluateAsInt(LResult, S.Context);
+      bool RIsInt = BinOp->getRHS()->EvaluateAsInt(RResult, S.Context);
+
+      if (LIsInt != RIsInt) {
+        BinaryOperatorKind BinOpKind = BinOp->getOpcode();
+
+        if (BinOpKind == BO_Add) {
+          if (LIsInt) {
+            Offset += LResult.getExtValue();
+            E = BinOp->getRHS();
+          } else {
+            Offset += RResult.getExtValue();
+            E = BinOp->getLHS();
+          }
+          goto tryAgain;
+        } else if (BinOpKind == BO_Sub) {
+          // We can only subtract from a pointer if we expect a literal.
+          if (RIsInt) {
+            Offset -= RResult.getExtValue();
+            E = BinOp->getLHS();
+            goto tryAgain;
+          }
+        }
+      }
+
+      return SLCT_NotALiteral;
+    }
+  }
 
   default:
     return SLCT_NotALiteral;
@@ -4118,11 +4177,12 @@
   // ObjC string uses the same format specifiers as C string, so we can use
   // the same format string checking logic for both ObjC and C strings.
   UncoveredArgHandler UncoveredArg;
+  int64_t Offset = 0;
   StringLiteralCheckType CT =
       checkFormatStringExpr(*this, OrigFormatExpr, Args, HasVAListArg,
                             format_idx, firstDataArg, Type, CallType,
-                            /*IsFunctionCall*/true, CheckedVarArgs,
-                            UncoveredArg);
+                            /*IsFunctionCall*/ true, CheckedVarArgs,
+                            UncoveredArg, Offset);
 
   // Generate a diagnostic where an uncovered argument is detected.
   if (UncoveredArg.hasUncoveredArg()) {
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to