Re: [PATCH] D12821: Allow for C's "writing off the end" idiom in __builtin_object_size
rsmith accepted this revision. rsmith added a comment. This revision is now accepted and ready to land. Some minor typographical comments. Please add some tests for the union case, then this LGTM. Comment at: lib/AST/ExprConstant.cpp:6333-6336 @@ +6332,6 @@ +if (BaseType->isArrayType()) { + // Because __builtin_object_size treats arrays as objects, we can ignore + // the index iff this is the last array in the Designator. + if (I + 1 == E) +return true; + auto *CAT = cast(Ctx.getAsArrayType(BaseType)); OK, but please reflect that this is specific to `__builtin_object_size` in the name or at least doc comment for this function. Comment at: lib/AST/ExprConstant.cpp:6361 @@ +6360,3 @@ + +/// Tests to see if the has a designator (that isn't necessarily valid). +static bool hasDesignator(const LValue ) { This is missing a noun. Comment at: lib/AST/ExprConstant.cpp:6362 @@ +6361,3 @@ +/// Tests to see if the has a designator (that isn't necessarily valid). +static bool hasDesignator(const LValue ) { + if (LVal.Designator.Invalid || !LVal.Designator.Entries.empty()) I think this would be clearer if it were named `hasNonemptyDesignator`... or perhaps reverse the sense of it and name it `refersToCompleteObject` or similar? http://reviews.llvm.org/D12821 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D12821: Allow for C's "writing off the end" idiom in __builtin_object_size
george.burgess.iv updated this revision to Diff 36972. george.burgess.iv marked 4 inline comments as done. george.burgess.iv added a comment. Addressed all feedback. Also, updated object-size tests to use CHECK-LABEL instead of CHECK, because yay I’m learning how to do things properly. http://reviews.llvm.org/D12821 Files: lib/AST/ExprConstant.cpp test/CodeGen/object-size.c test/CodeGen/object-size.cpp Index: test/CodeGen/object-size.cpp === --- test/CodeGen/object-size.cpp +++ test/CodeGen/object-size.cpp @@ -28,3 +28,37 @@ // CHECK: store i32 4 gi = __builtin_object_size((char*)(B*), 0); } + +// CHECK-LABEL: define void @_Z5test2v() +void test2() { + struct A { char buf[16]; }; + struct B : A {}; + struct C { int i; B bs[1]; } *c; + + // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false) + gi = __builtin_object_size(>bs[0], 0); + // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false) + gi = __builtin_object_size(>bs[0], 1); + // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true) + gi = __builtin_object_size(>bs[0], 2); + // CHECK: store i32 16 + gi = __builtin_object_size(>bs[0], 3); + + // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false) + gi = __builtin_object_size((A*)>bs[0], 0); + // CHECK: store i32 16 + gi = __builtin_object_size((A*)>bs[0], 1); + // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true) + gi = __builtin_object_size((A*)>bs[0], 2); + // CHECK: store i32 16 + gi = __builtin_object_size((A*)>bs[0], 3); + + // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false) + gi = __builtin_object_size(>bs[0].buf[0], 0); + // CHECK: store i32 16 + gi = __builtin_object_size(>bs[0].buf[0], 1); + // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true) + gi = __builtin_object_size(>bs[0].buf[0], 2); + // CHECK: store i32 16 + gi = __builtin_object_size(>bs[0].buf[0], 3); +} Index: test/CodeGen/object-size.c === --- test/CodeGen/object-size.c +++ test/CodeGen/object-size.c @@ -127,7 +127,7 @@ strcpy(gp += 1, "Hi there"); } -// CHECK: @test17 +// CHECK-LABEL: @test17 void test17() { // CHECK: store i32 -1 gi = __builtin_object_size(gp++, 0); @@ -139,15 +139,15 @@ gi = __builtin_object_size(gp++, 3); } -// CHECK: @test18 +// CHECK-LABEL: @test18 unsigned test18(int cond) { int a[4], b[4]; // CHECK: phi i32* // CHECK: call i64 @llvm.objectsize.i64 return __builtin_object_size(cond ? a : b, 0); } -// CHECK: @test19 +// CHECK-LABEL: @test19 void test19() { struct { int a, b; @@ -172,7 +172,7 @@ gi = __builtin_object_size(, 3); } -// CHECK: @test20 +// CHECK-LABEL: @test20 void test20() { struct { int t[10]; } t[10]; @@ -186,7 +186,7 @@ gi = __builtin_object_size([0].t[5], 3); } -// CHECK: @test21 +// CHECK-LABEL: @test21 void test21() { struct { int t; } t; @@ -209,7 +209,7 @@ gi = __builtin_object_size( + 1, 3); } -// CHECK: @test22 +// CHECK-LABEL: @test22 void test22() { struct { int t[10]; } t[10]; @@ -252,7 +252,7 @@ struct Test23Ty { int a; int t[10]; }; -// CHECK: @test23 +// CHECK-LABEL: @test23 void test23(struct Test23Ty *p) { // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false) gi = __builtin_object_size(p, 0); @@ -285,7 +285,7 @@ } // PR24493 -- ICE if __builtin_object_size called with NULL and (Type & 1) != 0 -// CHECK: @test24 +// CHECK-LABEL: @test24 void test24() { // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 false) gi = __builtin_object_size((void*)0, 0); @@ -299,7 +299,7 @@ gi = __builtin_object_size((void*)0, 3); } -// CHECK: @test25 +// CHECK-LABEL: @test25 void test25() { // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 false) gi = __builtin_object_size((void*)0x1000, 0); @@ -324,7 +324,7 @@ gi = __builtin_object_size((void*)0 + 0x1000, 3); } -// CHECK: @test26 +// CHECK-LABEL: @test26 void test26() { struct { int v[10]; } t[10]; @@ -340,7 +340,7 @@ struct Test27IncompleteTy; -// CHECK: @test27 +// CHECK-LABEL: @test27 void test27(struct Test27IncompleteTy *t) { // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false) gi = __builtin_object_size(t, 0); @@ -367,7 +367,7 @@ // The intent of this test is to ensure that __builtin_object_size treats `` // and `(T*)` identically, when used as the pointer argument. -// CHECK: @test28 +// CHECK-LABEL: @test28 void test28() { struct { int v[10]; } t[10]; @@ -391,3 +391,129 @@ gi = __builtin_object_size(addCasts([1].v[1]), 3); #undef addCasts } + +struct DynStructVar { + char fst[16]; + char snd[]; +}; + +struct DynStruct0 { + char fst[16]; + char snd[0]; +}; + +struct DynStruct1 { + char fst[16]; + char snd[1]; +}; + +struct StaticStruct { + char fst[16]; + char snd[2]; +};
Re: [PATCH] D12821: Allow for C's "writing off the end" idiom in __builtin_object_size
rsmith added inline comments. Comment at: lib/AST/ExprConstant.cpp:6311-6314 @@ +6310,6 @@ + QualType BaseType; + if (auto *E = Base.dyn_cast()) +BaseType = E->getType(); + else +BaseType = Base.get()->getType(); + // The outermost BaseType may be a pointer if we got an expression like `BaseType = getType(Base);` Comment at: lib/AST/ExprConstant.cpp:6315-6318 @@ +6314,6 @@ +BaseType = Base.get()->getType(); + // The outermost BaseType may be a pointer if we got an expression like + // `Foo->Bar`. + if (BaseType->isPointerType()) +BaseType = BaseType->getPointeeType(); + This seems like the wrong way to handle this case (we'll be computing the wrong type for such `LValue`s in other cases). In `LValueExprEvaluatorBase::VisitMemberExpr`, we should use Result.setInvalid(E); return false; instead of Result.setInvalid(E->getBase()); // fall through to perform member access I thought we'd already made that change, but it appears not. Comment at: lib/AST/ExprConstant.cpp:6331-6333 @@ +6330,5 @@ + BaseType = CAT->getElementType(); +} else if (BaseType->isAnyComplexType()) { + auto *CT = BaseType->castAs(); + BaseType = CT->getElementType(); +} else if (auto *FD = getAsField(LVal.Designator.Entries[I])) { You should check that the `ArrayIndex` is 1 in this case (a pointer to the `__real__` component doesn't point to the end of the object). Comment at: lib/AST/ExprConstant.cpp:6343-6348 @@ +6342,8 @@ +} else { + // Don't update BaseType because we don't want to answer 'true' in: + // struct Base { char Foo[1]; }; + // struct Derived : public Base { char Bar[1]; } d; + // __builtin_object_size(((Base*))->Foo, 1) + assert(getAsBaseClass(LVal.Designator.Entries[I]) != nullptr && + "Expecting cast to a base class"); +} Not updating `BaseType` will cause odd things to happen on the later iterations of this loop. You should probably just `return false` here. http://reviews.llvm.org/D12821 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D12821: Allow for C's "writing off the end" idiom in __builtin_object_size
mzolotukhin added a comment. FWIW, I'm really interested in seeing this patch in! Thanks, Michael http://reviews.llvm.org/D12821 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D12821: Allow for C's "writing off the end" idiom in __builtin_object_size
george.burgess.iv added a comment. Friendly ping :) http://reviews.llvm.org/D12821 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D12821: Allow for C's "writing off the end" idiom in __builtin_object_size
george.burgess.iv updated this revision to Diff 34710. george.burgess.iv marked 4 inline comments as done. george.burgess.iv added a comment. Addressed all feedback -- added a walk of the Designator as suggested. Regarding `isDesignatorAtObjectEnd`: I'm assuming that the Index returned by `FieldDescriptor::getFieldIndex` has some bearing on the actual position of the field in its parent. Specifically, that if `FD->getFieldIndex() + 1 == numFieldsIn(FD->getParent())`, then `FD` is the last field in its parent. I can't find documentation on this guarantee though, so I'm happy to swap to walking the offsets of each field in `FD->getParent()` if we feel that would be a more robust approach. http://reviews.llvm.org/D12821 Files: lib/AST/ExprConstant.cpp test/CodeGen/object-size.c test/CodeGen/object-size.cpp Index: test/CodeGen/object-size.cpp === --- test/CodeGen/object-size.cpp +++ test/CodeGen/object-size.cpp @@ -28,3 +28,37 @@ // CHECK: store i32 4 gi = __builtin_object_size((char*)(B*), 0); } + +// CHECK-LABEL: define void @_Z5test2v() +void test2() { + struct A { char buf[16]; }; + struct B : A {}; + struct C { int i; B bs[1]; } *c; + + // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false) + gi = __builtin_object_size(>bs[0], 0); + // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false) + gi = __builtin_object_size(>bs[0], 1); + // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true) + gi = __builtin_object_size(>bs[0], 2); + // CHECK: store i32 16 + gi = __builtin_object_size(>bs[0], 3); + + // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false) + gi = __builtin_object_size((A*)>bs[0], 0); + // CHECK: store i32 16 + gi = __builtin_object_size((A*)>bs[0], 1); + // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true) + gi = __builtin_object_size((A*)>bs[0], 2); + // CHECK: store i32 16 + gi = __builtin_object_size((A*)>bs[0], 3); + + // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false) + gi = __builtin_object_size(>bs[0].buf[0], 0); + // CHECK: store i32 16 + gi = __builtin_object_size(>bs[0].buf[0], 1); + // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true) + gi = __builtin_object_size(>bs[0].buf[0], 2); + // CHECK: store i32 16 + gi = __builtin_object_size(>bs[0].buf[0], 3); +} Index: test/CodeGen/object-size.c === --- test/CodeGen/object-size.c +++ test/CodeGen/object-size.c @@ -391,3 +391,105 @@ gi = __builtin_object_size(addCasts([1].v[1]), 3); #undef addCasts } + +struct DynStructVar { + char fst[16]; + char snd[]; +}; + +struct DynStruct0 { + char fst[16]; + char snd[0]; +}; + +struct DynStruct1 { + char fst[16]; + char snd[1]; +}; + +struct StaticStruct { + char fst[16]; + char snd[2]; +}; + +// CHECK-LABEL: @test29 +void test29(struct DynStructVar *dv, struct DynStruct0 *d0, +struct DynStruct1 *d1, struct StaticStruct *ss) { + // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false) + gi = __builtin_object_size(dv->snd, 0); + // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false) + gi = __builtin_object_size(dv->snd, 1); + // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true) + gi = __builtin_object_size(dv->snd, 2); + // CHECK: store i32 0 + gi = __builtin_object_size(dv->snd, 3); + + // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false) + gi = __builtin_object_size(d0->snd, 0); + // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false) + gi = __builtin_object_size(d0->snd, 1); + // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true) + gi = __builtin_object_size(d0->snd, 2); + // CHECK: store i32 0 + gi = __builtin_object_size(d0->snd, 3); + + // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false) + gi = __builtin_object_size(d1->snd, 0); + // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false) + gi = __builtin_object_size(d1->snd, 1); + // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true) + gi = __builtin_object_size(d1->snd, 2); + // CHECK: store i32 1 + gi = __builtin_object_size(d1->snd, 3); + + // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false) + gi = __builtin_object_size(ss->snd, 0); + // CHECK: store i32 2 + gi = __builtin_object_size(ss->snd, 1); + // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true) + gi = __builtin_object_size(ss->snd, 2); + // CHECK: store i32 2 + gi = __builtin_object_size(ss->snd, 3); +} + +// CHECK: @test30 +void test30() { + struct { struct DynStruct1 fst, snd; } *nested; + + // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false) + gi = __builtin_object_size(nested->fst.snd, 0); + // CHECK: store i32 1 + gi = __builtin_object_size(nested->fst.snd, 1); + // CHECK: call i64
Re: [PATCH] D12821: Allow for C's "writing off the end" idiom in __builtin_object_size
george.burgess.iv added inline comments. Comment at: lib/AST/ExprConstant.cpp:4457-4460 @@ -4434,1 +4456,6 @@ +// Because we set the Base to be the MemberExpr instead of E->getBase(), the +// Offset should be from the MemberExpr instead of the MemberExpr's base. +if (Result.InvalidBase) + Result.Offset = CharUnits::Zero(); + rsmith wrote: > I think you should bail out above, with the base set to the `MemberExpr` and > with an empty designator, rather than applying a member designator on top of > a base that already refers to the member (and then needing to undo the effect > on the offset here). With the new approach, this change isn't even necessary anymore :) Comment at: lib/AST/ExprConstant.cpp:6402 @@ +6401,3 @@ + // that behaves this way. + if (End.InvalidBase && (Type & 1) != 0 && + End.Designator.MostDerivedIsArrayElement && rsmith wrote: > This only seems necessary when `Type` is 1; for type 3, returning 1 would be > conservatively correct as a lower bound on the known-accessible storage, and > is better than giving up here and evaluating the object size as 0. Good catch Comment at: lib/AST/ExprConstant.cpp:6403-6404 @@ +6402,4 @@ + if (End.InvalidBase && (Type & 1) != 0 && + End.Designator.MostDerivedIsArrayElement && + End.Designator.MostDerivedArraySize < 2) { +// EM_FoldDesignator requires that all invalid bases be MemberExprs rsmith wrote: > Ideally, we should walk the designator and make sure that at each step we > picked the last subobject (last array element, last field, ...) -- that is, > we want to know that we're really looking at a trailing flexible array > member, and not just a size-1 array in the middle of some object. > > You should also check that the designator refers to the most-derived object > (that is, that `Entries.size() == MostDerivedPathLength`), because given: > > struct A { char buffer[10]; }; > struct B : A {}; > struct C { B b[1]; } *c; > int m = __builtin_object_size(c->b[0], 1); > int n = __builtin_object_size((A*)c->b[0], 1); > > ... we should presumably compute `m == -1` but `n == 10`, because for `n` the > `A` subobject of the `B` object is known to have size 10, even though we're > looking at a base subobject of a size-1 trailing array. > You should also check that the designator refers to the most-derived object > (that is, that Entries.size() == MostDerivedPathLength), because given That's a fun case! Thanks for catching that. > Ideally, we should walk the designator and make sure that at each step we > picked the last subobject (last array element, last field, ...) -- that is, > we want to know that we're really looking at a trailing flexible array > member, and not just a size-1 array in the middle of some object That's the goal of the AtEndOfObject check below. :) I guess it would fail with things like `union { char c[1]; int a; };` though. Added. http://reviews.llvm.org/D12821 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D12821: Allow for C's "writing off the end" idiom in __builtin_object_size
rsmith added inline comments. Comment at: lib/AST/ExprConstant.cpp:165 @@ -159,1 +164,3 @@ +/// Indicator of whether the most-derived object is an array. +bool MostDerivedIsArrayElement : 1; array -> array element. Comment at: lib/AST/ExprConstant.cpp:4457-4460 @@ -4434,1 +4456,6 @@ +// Because we set the Base to be the MemberExpr instead of E->getBase(), the +// Offset should be from the MemberExpr instead of the MemberExpr's base. +if (Result.InvalidBase) + Result.Offset = CharUnits::Zero(); + I think you should bail out above, with the base set to the `MemberExpr` and with an empty designator, rather than applying a member designator on top of a base that already refers to the member (and then needing to undo the effect on the offset here). Comment at: lib/AST/ExprConstant.cpp:6402 @@ +6401,3 @@ + // that behaves this way. + if (End.InvalidBase && (Type & 1) != 0 && + End.Designator.MostDerivedIsArrayElement && This only seems necessary when `Type` is 1; for type 3, returning 1 would be conservatively correct as a lower bound on the known-accessible storage, and is better than giving up here and evaluating the object size as 0. Comment at: lib/AST/ExprConstant.cpp:6403-6404 @@ +6402,4 @@ + if (End.InvalidBase && (Type & 1) != 0 && + End.Designator.MostDerivedIsArrayElement && + End.Designator.MostDerivedArraySize < 2) { +// EM_FoldDesignator requires that all invalid bases be MemberExprs Ideally, we should walk the designator and make sure that at each step we picked the last subobject (last array element, last field, ...) -- that is, we want to know that we're really looking at a trailing flexible array member, and not just a size-1 array in the middle of some object. You should also check that the designator refers to the most-derived object (that is, that `Entries.size() == MostDerivedPathLength`), because given: struct A { char buffer[10]; }; struct B : A {}; struct C { B b[1]; } *c; int m = __builtin_object_size(c->b[0], 1); int n = __builtin_object_size((A*)c->b[0], 1); ... we should presumably compute `m == -1` but `n == 10`, because for `n` the `A` subobject of the `B` object is known to have size 10, even though we're looking at a base subobject of a size-1 trailing array. http://reviews.llvm.org/D12821 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits