pgousseau created this revision. pgousseau added reviewers: cfe-commits, ayartsev, xazax.hun.
Dear All, I would like to propose a patch to avoid the false positive memory leak warning kindly reported by krzysztof in https://llvm.org/bugs/show_bug.cgi?id=22954 The issue seems originates from the CString checker's handling of 'memcpy' (and string copy functions in general). Given the below code snippet: ---------------------- struct aa { char *s; char data[32];}; ... a.s = malloc(nbytes); memcpy(a.data, source, len); ... ---------------------- As the CString checker handles the memcpy call, it requests the invalidation of the 'a.data' region. But the invalidation worker marks the whole memory region of 'a' as to be invalidated. The Malloc checker is not made aware of this causing the false positive. Following advices from Anton Yartsev and Gabor Horvath on cfe-dev (http://lists.cs.uiuc.edu/pipermail/cfe-dev/2015-July/043786.html), this patch introduces a new trait 'TK_DoNotInvalidateSuperRegion', for the invalidation worker to take into account, when invalidating a destination buffer of type 'FieldRegion'. Please let me know if this is an acceptable change and if yes eventually commit it for me (as I do not have svn access) ? Regards, Pierre Gousseau SN Systems - Sony Computer Entertainment http://reviews.llvm.org/D11832 Files: include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h lib/StaticAnalyzer/Checkers/CStringChecker.cpp lib/StaticAnalyzer/Core/RegionStore.cpp test/Analysis/pr22954.c
Index: test/Analysis/pr22954.c =================================================================== --- /dev/null +++ test/Analysis/pr22954.c @@ -0,0 +1,266 @@ +// Given code 'struct aa { char s1[4]; char * s2;} a; memcpy(a.s1, ...);', +// this test checks that the CStringChecker only invalidates the destination buffer array a.s1 (instead of a.s1 and a.s2). +// At the moment the whole of the destination array content is invalidated. +// RUN: %clang_cc1 -analyze -analyzer-checker=core,unix.Malloc,debug.ExprInspection -analyzer-store=region -verify %s + +typedef __typeof(sizeof(int)) size_t; + +char *strdup(const char *s); +void free(void *); +void *memcpy(void *dst, const void *src, size_t n); + +void clang_analyzer_eval(int); + +struct aa { + char s1[4]; + char *s2; +}; + +// Test different types of structure initialisation. +int f0() { + struct aa a0 = {{1, 2, 3, 4}, 0}; + a0.s2 = strdup("hello"); + char input[] = {'a', 'b', 'c', 'd'}; + memcpy(a0.s1, input, 4); + clang_analyzer_eval(a0.s1[0] == 'a'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(a0.s1[1] == 'b'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(a0.s1[2] == 'c'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(a0.s1[3] == 'd'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(a0.s2 == 0); // expected-warning{{UNKNOWN}} + free(a0.s2); // no warning + return 0; +} + +int f1() { + struct aa a1; + a1.s2 = strdup("hello"); + char input[] = {'a', 'b', 'c', 'd'}; + memcpy(a1.s1, input, 4); + clang_analyzer_eval(a1.s1[0] == 'a'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(a1.s1[1] == 'b'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(a1.s1[2] == 'c'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(a1.s1[3] == 'd'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(a1.s2 == 0); // expected-warning{{UNKNOWN}} + free(a1.s2); // no warning + return 0; +} + +int f2() { + struct aa a2 = {{1, 2}}; + a2.s2 = strdup("hello"); + char input[] = {'a', 'b', 'c', 'd'}; + memcpy(a2.s1, input, 4); + clang_analyzer_eval(a2.s1[0] == 'a'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(a2.s1[1] == 'b'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(a2.s1[2] == 'c'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(a2.s1[3] == 'd'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(a2.s2 == 0); // expected-warning{{UNKNOWN}} + free(a2.s2); // no warning + return 0; +} + +int f3() { + struct aa a3 = {{1, 2, 3, 4}, 0}; + a3.s2 = strdup("hello"); + char input[] = {'a', 'b', 'c', 'd'}; + int * dest = (int*)a3.s1; + memcpy(dest, input, 4); + clang_analyzer_eval(a3.s1[0] == 'a'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(dest[0] == 'a'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(a3.s1[1] == 'b'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(dest[1] == 'b'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(a3.s1[2] == 'c'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(dest[2] == 'c'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(a3.s1[3] == 'd'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(dest[3] == 'd'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(a3.s2 == 0); // expected-warning{{UNKNOWN}} + free(a3.s2); // no warning + return 0; +} + +struct bb { + struct aa a; + char * s2; +}; + +int f4() { + struct bb b0 = {{1, 2, 3, 4}, 0}; + b0.s2 = strdup("hello"); + b0.a.s2 = strdup("hola"); + char input[] = {'a', 'b', 'c', 'd'}; + char * dest = (char*)(b0.a.s1); + memcpy(dest, input, 4); + clang_analyzer_eval(b0.a.s1[0] == 'a'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(dest[0] == 'a'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(b0.a.s1[1] == 'b'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(dest[1] == 'b'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(b0.a.s1[2] == 'c'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(dest[2] == 'c'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(b0.a.s1[3] == 'd'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(dest[3] == 'd'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(b0.s2 == 0); // expected-warning{{UNKNOWN}} + free(b0.a.s2); // no warning + free(b0.s2); // no warning + return 0; +} + +int f5() { + struct aa a0 = {{1, 2, 3, 4}, 0}; + a0.s2 = strdup("hello"); + char input[] = {'a', 'b', 'c', 'd'}; + memcpy(a0.s1, input, 4); + return 0; // expected-warning{{Potential leak of memory pointed to by 'a0.s2'}} +} + +int f6() { + struct aa a1; + a1.s2 = strdup("hello"); + char input[] = {'a', 'b', 'c', 'd'}; + memcpy(a1.s1, input, 4); + return 0; // expected-warning{{Potential leak of memory pointed to by 'a1.s2'}} +} + +int f7() { + struct aa a2 = {{1, 2}}; + a2.s2 = strdup("hello"); + char input[] = {'a', 'b', 'c', 'd'}; + memcpy(a2.s1, input, 4); + return 0; // expected-warning{{Potential leak of memory pointed to by 'a2.s2'}} +} + +int f8() { + struct aa a3 = {{1, 2, 3, 4}, 0}; + a3.s2 = strdup("hello"); + char input[] = {'a', 'b', 'c', 'd'}; + int * dest = (int*)a3.s1; + memcpy(dest, input, 4); + return 0; // expected-warning{{Potential leak of memory pointed to by 'a3.s2'}} +} + +int f9() { + struct bb b0 = {{1, 2, 3, 4}, 0}; + b0.s2 = strdup("hello"); + b0.a.s2 = strdup("hola"); + char input[] = {'a', 'b', 'c', 'd'}; + char * dest = (char*)(b0.a.s1); + memcpy(dest, input, 4); + free(b0.a.s2); // expected-warning{{Potential leak of memory pointed to by 'b0.s2'}} + return 0; +} + +int f10() { + struct bb b0 = {{1, 2, 3, 4}, 0}; + b0.s2 = strdup("hello"); + b0.a.s2 = strdup("hola"); + char input[] = {'a', 'b', 'c', 'd'}; + char * dest = (char*)(b0.a.s1); + memcpy(dest, input, 4); + free(b0.s2); // expected-warning{{Potential leak of memory pointed to by 'b0.a.s2'}} + return 0; +} + +struct cc { + char * s1; + char * s2; +}; + +int f11() { + char x[4] = {1, 2}; + struct cc c0; + c0.s2 = strdup("hello"); + c0.s1 = &x[0]; + char input[] = {'a', 'b', 'c', 'd'}; + memcpy(c0.s1, input, 4); + clang_analyzer_eval(c0.s1[0] == 'a'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(c0.s1[1] == 'b'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(c0.s1[2] == 'c'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(c0.s1[3] == 'd'); // expected-warning{{UNKNOWN}} + free(c0.s2); // no-warning + return 0; +} + +struct dd { + char *s2; + char s1[4]; +}; + +int f12() { + struct dd d0 = {0, {1, 2, 3, 4}}; + d0.s2 = strdup("hello"); + char input[] = {'a', 'b', 'c', 'd'}; + memcpy(d0.s1, input, 4); + clang_analyzer_eval(d0.s1[0] == 'a'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(d0.s1[1] == 'b'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(d0.s1[2] == 'c'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(d0.s1[3] == 'd'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(d0.s2 == 0); // expected-warning{{UNKNOWN}} + free(d0.s2); // no warning + return 0; +} + +struct ee { + int a; + char b; +}; + +struct EE { + struct ee s1[2]; + char * s2; +}; + +int f13() { + struct EE E0 = {{{1, 2}, {3, 4}}, 0}; + E0.s2 = strdup("hello"); + char input[] = {'a', 'b', 'c', 'd'}; + memcpy(E0.s1, input, 4); + clang_analyzer_eval(E0.s1[0].a == 'a'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(E0.s1[0].b == 'b'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(E0.s1[1].a == 'c'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(E0.s1[1].b == 'd'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(E0.s2 == 0); // expected-warning{{UNKNOWN}} + free(E0.s2); // no warning + return 0; +} + +struct ff { + char * s1[2]; + char * s2; +}; + +int f14() { + struct ff f0 = {{0, 0}, 0}; + f0.s1[0] = strdup("hello"); + f0.s1[1] = strdup("hola"); + f0.s2 = strdup("world"); + char input[] = {'a', 'b', 'c', 'd'}; + memcpy(f0.s1[0], input, 4); + memcpy(f0.s1[1], input, 4); + clang_analyzer_eval(f0.s1[0][0] == 'a'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(f0.s1[0][1] == 'b'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(f0.s1[0][2] == 'c'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(f0.s1[0][3] == 'd'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(f0.s1[1][0] == 'a'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(f0.s1[1][1] == 'b'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(f0.s1[1][2] == 'c'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(f0.s1[1][3] == 'd'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(f0.s2 == 0); // expected-warning{{UNKNOWN}} + free(f0.s2); + free(f0.s1[1]); + free(f0.s1[0]); // no warning + return 0; // no warning +} + +struct aa a15 = {{1, 2, 3, 4}, 0}; + +int f15() { + a15.s2 = strdup("hello"); + char input[] = {'a', 'b', 'c', 'd'}; + memcpy(a15.s1, input, 4); + clang_analyzer_eval(a15.s1[0] == 'a'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(a15.s1[1] == 'b'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(a15.s1[2] == 'c'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(a15.s1[3] == 'd'); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(a15.s2 == 0); // expected-warning{{UNKNOWN}} + free(a15.s2); // no warning + return 0; +} Index: lib/StaticAnalyzer/Core/RegionStore.cpp =================================================================== --- lib/StaticAnalyzer/Core/RegionStore.cpp +++ lib/StaticAnalyzer/Core/RegionStore.cpp @@ -345,7 +345,8 @@ /// regions. void populateWorkList(invalidateRegionsWorker &W, ArrayRef<SVal> Values, - InvalidatedRegions *TopLevelRegions); + InvalidatedRegions *TopLevelRegions, + RegionAndSymbolInvalidationTraits &ITraits); public: RegionStoreManager(ProgramStateManager& mgr, const RegionStoreFeatures &f) @@ -715,8 +716,12 @@ return true; } - bool AddToWorkList(const MemRegion *R) { - const MemRegion *BaseR = R->getBaseRegion(); + bool AddToWorkList(const MemRegion *R, + RegionAndSymbolInvalidationTraits &ITraits) { + bool doNotInvalidateSuperRegion = ITraits.hasTrait( + R, RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion); + const MemRegion *BaseR = + doNotInvalidateSuperRegion ? R : R->getBaseRegion(); return AddToWorkList(WorkListElement(BaseR), getCluster(BaseR)); } @@ -971,7 +976,7 @@ IS.insert(Sym); if (const MemRegion *R = V.getAsRegion()) { - AddToWorkList(R); + AddToWorkList(R, ITraits); return; } @@ -1015,7 +1020,7 @@ const VarRegion *VR = BI.getCapturedRegion(); const VarDecl *VD = VR->getDecl(); if (VD->hasAttr<BlocksAttr>() || !VD->hasLocalStorage()) { - AddToWorkList(VR); + AddToWorkList(VR, ITraits); } else if (Loc::isLocType(VR->getValueType())) { // Map the current bindings to a Store to retrieve the value @@ -1026,7 +1031,7 @@ SVal V = RM.getBinding(B, loc::MemRegionVal(VR)); if (Optional<Loc> L = V.getAs<Loc>()) { if (const MemRegion *LR = L->getAsRegion()) - AddToWorkList(LR); + AddToWorkList(LR, ITraits); } } } @@ -1077,6 +1082,43 @@ } if (const ArrayType *AT = Ctx.getAsArrayType(T)) { + bool doNotInvalidateSuperRegion = ITraits.hasTrait( + baseR, + RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion); + + if (doNotInvalidateSuperRegion) { + // We are not doing blank invalidation of the whole array region so we + // have to manually invalidate each elements. + Optional<uint64_t> NumElements; + + // Compute lower and upper offsets for region within array. + if (const ConstantArrayType *CAT = dyn_cast<ConstantArrayType>(AT)) + NumElements = CAT->getSize().getZExtValue(); + if (!NumElements) + return; + QualType ElementTy = AT->getElementType(); + uint64_t ElemSize = Ctx.getTypeSize(ElementTy); + if (!ElemSize) + return; + const RegionOffset &RO = baseR->getAsOffset(); + assert(RO.getOffset() >= 0 && "Offset should not be negative"); + uint64_t LowerOffset = RO.getOffset(); + uint64_t UpperOffset = LowerOffset + *NumElements * ElemSize; + + // Invalidate regions which are within array boundaries. + if (const MemRegion *SuperR = baseR->getBaseRegion()) { + if (const ClusterBindings *C = B.lookup(SuperR)) { + for (ClusterBindings::iterator I = C->begin(), E = C->end(); I != E; + ++I) { + uint64_t ROffset = I.getKey().getOffset(); + if (ROffset < LowerOffset || ROffset >= UpperOffset) + continue; + B = B.removeBinding(I.getKey()); + } + } + } + } + // Set the default value of the array to conjured symbol. DefinedOrUnknownSVal V = svalBuilder.conjureSymbolVal(baseR, Ex, LCtx, @@ -1118,7 +1160,8 @@ void RegionStoreManager::populateWorkList(invalidateRegionsWorker &W, ArrayRef<SVal> Values, - InvalidatedRegions *TopLevelRegions) { + InvalidatedRegions *TopLevelRegions, + RegionAndSymbolInvalidationTraits &ITraits) { for (ArrayRef<SVal>::iterator I = Values.begin(), E = Values.end(); I != E; ++I) { SVal V = *I; @@ -1132,15 +1175,15 @@ // Note: the last argument is false here because these are // non-top-level regions. if (const MemRegion *R = (*I).getAsRegion()) - W.AddToWorkList(R); + W.AddToWorkList(R, ITraits); } continue; } if (const MemRegion *R = V.getAsRegion()) { if (TopLevelRegions) TopLevelRegions->push_back(R); - W.AddToWorkList(R); + W.AddToWorkList(R, ITraits); continue; } } @@ -1174,7 +1217,7 @@ W.GenerateClusters(); // Add the regions to the worklist. - populateWorkList(W, Values, TopLevelRegions); + populateWorkList(W, Values, TopLevelRegions, ITraits); W.RunWorkList(); @@ -2264,14 +2307,15 @@ // If V is a region, then add it to the worklist. if (const MemRegion *R = V.getAsRegion()) { - AddToWorkList(R); + RegionAndSymbolInvalidationTraits ITraits; + AddToWorkList(R, ITraits); // All regions captured by a block are also live. if (const BlockDataRegion *BR = dyn_cast<BlockDataRegion>(R)) { BlockDataRegion::referenced_vars_iterator I = BR->referenced_vars_begin(), E = BR->referenced_vars_end(); for ( ; I != E; ++I) - AddToWorkList(I.getCapturedRegion()); + AddToWorkList(I.getCapturedRegion(), ITraits); } } @@ -2291,7 +2335,8 @@ I = Postponed.begin(), E = Postponed.end() ; I != E ; ++I) { if (const SymbolicRegion *SR = *I) { if (SymReaper.isLive(SR->getSymbol())) { - changed |= AddToWorkList(SR); + RegionAndSymbolInvalidationTraits ITraits; + changed |= AddToWorkList(SR, ITraits); *I = nullptr; } } @@ -2310,7 +2355,8 @@ // Enqueue the region roots onto the worklist. for (SymbolReaper::region_iterator I = SymReaper.region_begin(), E = SymReaper.region_end(); I != E; ++I) { - W.AddToWorkList(*I); + RegionAndSymbolInvalidationTraits ITraits; + W.AddToWorkList(*I, ITraits); } do W.RunWorkList(); while (W.UpdatePostponed()); Index: lib/StaticAnalyzer/Checkers/CStringChecker.cpp =================================================================== --- lib/StaticAnalyzer/Checkers/CStringChecker.cpp +++ lib/StaticAnalyzer/Checkers/CStringChecker.cpp @@ -847,6 +847,14 @@ RegionAndSymbolInvalidationTraits::TK_PreserveContents); ITraits.setTrait(R, RegionAndSymbolInvalidationTraits::TK_SuppressEscape); CausesPointerEscape = true; + } else { + if (const FieldRegion *FR = dyn_cast<FieldRegion>(R)) { + // If destination buffer is a field region, do not invalidate the parent + // structure. + ITraits.setTrait( + R, + RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion); + } } return state->invalidateRegions(R, E, C.blockCount(), LCtx, Index: include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h =================================================================== --- include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h +++ include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h @@ -1333,7 +1333,9 @@ /// Tells that a region's contents is not changed. TK_PreserveContents = 0x1, /// Suppress pointer-escaping of a region. - TK_SuppressEscape = 0x2 + TK_SuppressEscape = 0x2, + // Do not invalidate super region. + TK_DoNotInvalidateSuperRegion = 0x4 // Do not forget to extend StorageTypeForKinds if number of traits exceed // the number of bits StorageTypeForKinds can store.
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits