https://github.com/flovent created https://github.com/llvm/llvm-project/pull/146212
Bounded string functions takes smallest of two values as it's copy size (`amountCopied` variable in `evalStrcpyCommon`), and it's used to decided whether this operation will cause out-of-bound access and invalidate it's super region if it does. for `strlcat`: `amountCopied = min (size - dstLen - 1 , srcLen)` for others: `amountCopied = min (srcLen, size)` Currently when one of two values is unknown or `SValBuilder` can't decide which one is smaller, `amountCopied` will remain `UnknownVal`, which will invalidate copy destination's super region unconditionally. This patch add check to see if one of these two values is definitely in-bound, if so `amountCopied` has to be in-bound too, because it‘s less than or equal to them, we can avoid the invalidation of super region and some related false positives in this situation. Closes #143807. >From 9da53c788fc01cd3fc2dd4c178b836035b5d380b Mon Sep 17 00:00:00 2001 From: flovent <flb...@protonmail.com> Date: Sat, 28 Jun 2025 20:58:43 +0800 Subject: [PATCH] [analyzer] Avoid unnecessary super region invalidation in `CStringChecker` --- .../Checkers/CStringChecker.cpp | 77 ++++++++++++- .../cstring-should-not-invalidate.cpp | 107 ++++++++++++++++++ 2 files changed, 178 insertions(+), 6 deletions(-) create mode 100644 clang/test/Analysis/cstring-should-not-invalidate.cpp diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp index 4d12fdcec1f1a..433fd2ce5f292 100644 --- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp @@ -272,7 +272,8 @@ class CStringChecker : public Checker< eval::Call, static ProgramStateRef invalidateDestinationBufferBySize(CheckerContext &C, ProgramStateRef S, const Expr *BufE, ConstCFGElementRef Elem, - SVal BufV, SVal SizeV, QualType SizeTy); + SVal BufV, SVal SizeV, QualType SizeTy, + bool CouldAccessOutOfBound = true); /// Operation never overflows, do not invalidate the super region. static ProgramStateRef invalidateDestinationBufferNeverOverflows( @@ -1211,14 +1212,17 @@ bool CStringChecker::isFirstBufInBound(CheckerContext &C, ProgramStateRef State, ProgramStateRef CStringChecker::invalidateDestinationBufferBySize( CheckerContext &C, ProgramStateRef S, const Expr *BufE, - ConstCFGElementRef Elem, SVal BufV, SVal SizeV, QualType SizeTy) { + ConstCFGElementRef Elem, SVal BufV, SVal SizeV, QualType SizeTy, + bool CouldAccessOutOfBound) { auto InvalidationTraitOperations = - [&C, S, BufTy = BufE->getType(), BufV, SizeV, - SizeTy](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) { + [&C, S, BufTy = BufE->getType(), BufV, SizeV, SizeTy, + CouldAccessOutOfBound](RegionAndSymbolInvalidationTraits &ITraits, + const MemRegion *R) { // If destination buffer is a field region and access is in bound, do // not invalidate its super region. if (MemRegion::FieldRegionKind == R->getKind() && - isFirstBufInBound(C, S, BufV, BufTy, SizeV, SizeTy)) { + (!CouldAccessOutOfBound || + isFirstBufInBound(C, S, BufV, BufTy, SizeV, SizeTy))) { ITraits.setTrait( R, RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion); @@ -2223,6 +2227,67 @@ void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallEvent &Call, Result = lastElement; } + // For bounded method, amountCopied take the minimum of two values, + // for ConcatFnKind::strlcat: + // amountCopied = min (size - dstLen - 1 , srcLen) + // for others: + // amountCopied = min (srcLen, size) + // So even if we don't know about amountCopied, as long as one of them will + // not cause an out-of-bound access, the whole function's operation will not + // too, that will avoid invalidating the superRegion of data member in that + // situation. + bool CouldAccessOutOfBound = true; + if (IsBounded && amountCopied.isUnknown()) { + // Get the max number of characters to copy. + SizeArgExpr lenExpr = {{Call.getArgExpr(2), 2}}; + SVal lenVal = state->getSVal(lenExpr.Expression, LCtx); + + // Protect against misdeclared strncpy(). + lenVal = + svalBuilder.evalCast(lenVal, sizeTy, lenExpr.Expression->getType()); + + std::optional<NonLoc> lenValNL = lenVal.getAs<NonLoc>(); + + auto CouldAccessOutOfBoundForSVal = [&](NonLoc Val) -> bool { + return !isFirstBufInBound(C, state, C.getSVal(Dst.Expression), + Dst.Expression->getType(), Val, + C.getASTContext().getSizeType()); + }; + + if (strLengthNL) { + CouldAccessOutOfBound = CouldAccessOutOfBoundForSVal(*strLengthNL); + } + + if (CouldAccessOutOfBound && lenValNL) { + switch (appendK) { + case ConcatFnKind::none: + case ConcatFnKind::strcat: { + CouldAccessOutOfBound = CouldAccessOutOfBoundForSVal(*lenValNL); + break; + } + case ConcatFnKind::strlcat: { + if (!dstStrLengthNL) + break; + + SVal freeSpace = svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL, + *dstStrLengthNL, sizeTy); + if (!isa<NonLoc>(freeSpace)) + break; + + freeSpace = + svalBuilder.evalBinOp(state, BO_Sub, freeSpace, + svalBuilder.makeIntVal(1, sizeTy), sizeTy); + std::optional<NonLoc> freeSpaceNL = freeSpace.getAs<NonLoc>(); + if (!freeSpaceNL) + break; + + CouldAccessOutOfBound = CouldAccessOutOfBoundForSVal(*freeSpaceNL); + break; + } + } + } + } + // Invalidate the destination (regular invalidation without pointer-escaping // the address of the top-level region). This must happen before we set the // C string length because invalidation will clear the length. @@ -2232,7 +2297,7 @@ void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallEvent &Call, // string, but that's still an improvement over blank invalidation. state = invalidateDestinationBufferBySize( C, state, Dst.Expression, Call.getCFGElementRef(), *dstRegVal, - amountCopied, C.getASTContext().getSizeType()); + amountCopied, C.getASTContext().getSizeType(), CouldAccessOutOfBound); // Invalidate the source (const-invalidation without const-pointer-escaping // the address of the top-level region). diff --git a/clang/test/Analysis/cstring-should-not-invalidate.cpp b/clang/test/Analysis/cstring-should-not-invalidate.cpp new file mode 100644 index 0000000000000..14c92447c52ca --- /dev/null +++ b/clang/test/Analysis/cstring-should-not-invalidate.cpp @@ -0,0 +1,107 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection +// -analyzer-config c++-inlining=constructors -verify %s + +// expected-no-diagnostics + +typedef unsigned int size_t; + +char *strncpy(char *dest, const char *src, size_t x); + +// issue 143807 +struct strncpyTestClass { + int *m_ptr; + char m_buff[1000]; + + void KnownLen(char *src) { + m_ptr = new int; + strncpy(m_buff, src, sizeof(m_buff)); // known len but unknown src size + delete m_ptr; // no warning + } + + void KnownSrcLen(size_t n) { + m_ptr = new int; + strncpy(m_buff, "xyz", n); // known src size but unknown len + delete m_ptr; // no warning + } +}; + +void strncpyTest(char *src, size_t n) { + strncpyTestClass rep; + rep.KnownLen(src); + rep.KnownSrcLen(n); +} + +size_t strlcpy(char *dest, const char *src, size_t size); + +struct strlcpyTestClass { + int *m_ptr; + char m_buff[1000]; + + void KnownLen(char *src) { + m_ptr = new int; + strlcpy(m_buff, src, sizeof(m_buff)); // known len but unknown src size + delete m_ptr; // no warning + } + + void KnownSrcLen(size_t n) { + m_ptr = new int; + strlcpy(m_buff, "xyz", n); // known src size but unknown len + delete m_ptr; // no warning + } +}; + +void strlcpyTest(char *src, size_t n) { + strlcpyTestClass rep; + rep.KnownLen(src); + rep.KnownSrcLen(n); +} + +char *strncat(char *s1, const char *s2, size_t n); + +struct strncatTestClass { + int *m_ptr; + char m_buff[1000]; + + void KnownLen(char *src) { + m_ptr = new int; + strncat(m_buff, src, sizeof(m_buff)); // known len but unknown src size + delete m_ptr; // no warning + } + + void KnownSrcLen(size_t n) { + m_ptr = new int; + strncat(m_buff, "xyz", n); // known src size but unknown len + delete m_ptr; // no warning + } +}; + +void strncatTest(char *src, size_t n) { + strncatTestClass rep; + rep.KnownLen(src); + rep.KnownSrcLen(n); +} + +size_t strlcat(char *dst, const char *src, size_t size); + +struct strlcatTestClass { + int *m_ptr; + char m_buff[1000]; + + void KnownLen(char *src) { + m_ptr = new int; + strlcat(m_buff, src, sizeof(m_buff)); // known len but unknown src size + delete m_ptr; // no warning + } + + void KnownSrcLen(size_t n) { + m_ptr = new int; + strlcat(m_buff, "xyz", n); // known src size but unknown len + delete m_ptr; // no warning + } +}; + +void strlcatTest(char *src, size_t n) { + strlcatTestClass rep; + rep.KnownLen(src); + rep.KnownSrcLen(n); +} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits