[PATCH] D53586: Implement Function Multiversioning for Non-ELF Systems.
This revision was automatically updated to reflect the committed changes. Closed by commit rL345298: Implement Function Multiversioning for Non-ELF Systems. (authored by erichkeane, committed by ). Herald added a subscriber: llvm-commits. Changed prior to commit: https://reviews.llvm.org/D53586?vs=171118&id=171160#toc Repository: rL LLVM https://reviews.llvm.org/D53586 Files: cfe/trunk/include/clang/AST/Decl.h cfe/trunk/include/clang/Basic/Attr.td cfe/trunk/include/clang/Basic/TargetInfo.h cfe/trunk/lib/AST/Decl.cpp cfe/trunk/lib/Basic/Targets/X86.h cfe/trunk/lib/CodeGen/CodeGenFunction.cpp cfe/trunk/lib/CodeGen/CodeGenFunction.h cfe/trunk/lib/CodeGen/CodeGenModule.cpp cfe/trunk/lib/CodeGen/CodeGenModule.h cfe/trunk/test/CodeGen/attr-cpuspecific.c cfe/trunk/test/CodeGen/attr-target-mv-func-ptrs.c cfe/trunk/test/CodeGen/attr-target-mv-va-args.c cfe/trunk/test/CodeGen/attr-target-mv.c cfe/trunk/test/CodeGenCXX/attr-target-mv-diff-ns.cpp cfe/trunk/test/CodeGenCXX/attr-target-mv-func-ptrs.cpp cfe/trunk/test/CodeGenCXX/attr-target-mv-inalloca.cpp cfe/trunk/test/CodeGenCXX/attr-target-mv-member-funcs.cpp cfe/trunk/test/CodeGenCXX/attr-target-mv-out-of-line-defs.cpp cfe/trunk/test/CodeGenCXX/attr-target-mv-overloads.cpp cfe/trunk/test/Sema/attr-target-mv-bad-target.c Index: cfe/trunk/include/clang/AST/Decl.h === --- cfe/trunk/include/clang/AST/Decl.h +++ cfe/trunk/include/clang/AST/Decl.h @@ -2233,6 +2233,10 @@ /// part of the cpu_specific/cpu_dispatch functionality. bool isCPUSpecificMultiVersion() const; + /// True if this function is a multiversioned dispatch function as a part of + /// the target functionality. + bool isTargetMultiVersion() const; + void setPreviousDeclaration(FunctionDecl * PrevDecl); FunctionDecl *getCanonicalDecl() override; Index: cfe/trunk/include/clang/Basic/TargetInfo.h === --- cfe/trunk/include/clang/Basic/TargetInfo.h +++ cfe/trunk/include/clang/Basic/TargetInfo.h @@ -1082,9 +1082,15 @@ return false; } - /// Identify whether this taret supports multiversioning of functions, + /// Identify whether this target supports multiversioning of functions, /// which requires support for cpu_supports and cpu_is functionality. - virtual bool supportsMultiVersioning() const { return false; } + bool supportsMultiVersioning() const { +return getTriple().getArch() == llvm::Triple::x86 || + getTriple().getArch() == llvm::Triple::x86_64; + } + + /// Identify whether this target supports IFuncs. + bool supportsIFunc() const { return getTriple().isOSBinFormatELF(); } // Validate the contents of the __builtin_cpu_supports(const char*) // argument. Index: cfe/trunk/include/clang/Basic/Attr.td === --- cfe/trunk/include/clang/Basic/Attr.td +++ cfe/trunk/include/clang/Basic/Attr.td @@ -858,7 +858,7 @@ } def CPUSpecific : InheritableAttr { - let Spellings = [Clang<"cpu_specific">]; + let Spellings = [Clang<"cpu_specific">, Declspec<"cpu_specific">]; let Args = [VariadicIdentifierArgument<"Cpus">]; let Subjects = SubjectList<[Function]>; let Documentation = [CPUSpecificCPUDispatchDocs]; @@ -872,7 +872,7 @@ } def CPUDispatch : InheritableAttr { - let Spellings = [Clang<"cpu_dispatch">]; + let Spellings = [Clang<"cpu_dispatch">, Declspec<"cpu_dispatch">]; let Args = [VariadicIdentifierArgument<"Cpus">]; let Subjects = SubjectList<[Function]>; let Documentation = [CPUSpecificCPUDispatchDocs]; Index: cfe/trunk/test/CodeGen/attr-target-mv-va-args.c === --- cfe/trunk/test/CodeGen/attr-target-mv-va-args.c +++ cfe/trunk/test/CodeGen/attr-target-mv-va-args.c @@ -1,4 +1,5 @@ -// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=LINUX +// RUN: %clang_cc1 -triple x86_64-windows-pc -emit-llvm %s -o - | FileCheck %s --check-prefix=WINDOWS int __attribute__((target("sse4.2"))) foo(int i, ...) { return 0; } int __attribute__((target("arch=sandybridge"))) foo(int i, ...); int __attribute__((target("arch=ivybridge"))) foo(int i, ...) {return 1;} @@ -8,19 +9,37 @@ return foo(1, 'a', 1.1) + foo(2, 2.2, "asdf"); } -// CHECK: @foo.ifunc = ifunc i32 (i32, ...), i32 (i32, ...)* ()* @foo.resolver -// CHECK: define i32 @foo.sse4.2(i32 %i, ...) -// CHECK: ret i32 0 -// CHECK: define i32 @foo.arch_ivybridge(i32 %i, ...) -// CHECK: ret i32 1 -// CHECK: define i32 @foo(i32 %i, ...) -// CHECK: ret i32 2 -// CHECK: define i32 @bar() -// CHECK: call i32 (i32, ...) @foo.ifunc(i32 1, i32 97, double -// CHECK: call i32 (i32, ...) @foo.ifunc(i32 2, double 2.2{{[0-9Ee+]+}}, i8* getelementptr inbounds -// CHECK: define i32 (i32, ...
[PATCH] D53586: Implement Function Multiversioning for Non-ELF Systems.
erichkeane added a comment. In https://reviews.llvm.org/D53586#1276198, @rnk wrote: > Here's a thought. What happens if different TUs observe different overload > sets, perhaps because of ifdefs? Different TUs will generate different > resolvers, but they won't dispatch to the same sets of targets. I'm guessing > we'd treat that as an ODR violation, no diagnostic required? > > Anyway, I think this is ready to commit. Thanks! Yep, pretty much. Its a bit of a weakness of the 'target' mv (since dispatch requires you to explicitly list), but I believe it is a trade off that gcc felt was acceptable.x`x` https://reviews.llvm.org/D53586 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D53586: Implement Function Multiversioning for Non-ELF Systems.
rnk accepted this revision. rnk added a comment. Here's a thought. What happens if different TUs observe different overload sets, perhaps because of ifdefs? Different TUs will generate different resolvers, but they won't dispatch to the same sets of targets. I'm guessing we'd treat that as an ODR violation, no diagnostic required? Anyway, I think this is ready to commit. Thanks! Comment at: test/CodeGen/attr-target-mv-va-args.c:41-44 +// WINDOWS: call i32 (i32, ...) @foo.arch_sandybridge +// WINDOWS: call i32 (i32, ...) @foo.arch_ivybridge +// WINDOWS: call i32 (i32, ...) @foo.sse4.2 +// WINDOWS: call i32 (i32, ...) @foo Probably worth matching out the musttail for variadics, since the ellipsis doesn't make sense without it. Comment at: test/CodeGenCXX/attr-target-mv-inalloca.cpp:45-47 +// WINDOWS: musttail call i32 @"?bar@@YAHUFoo@@@Z.arch_ivybridge"(<{ %struct.Foo }>* %0) +// WINDOWS: musttail call i32 @"?bar@@YAHUFoo@@@Z.sse4.2"(<{ %struct.Foo }>* %0) +// WINDOWS: musttail call i32 @"?bar@@YAHUFoo@@@Z"(<{ %struct.Foo }>* %0) I think it's worth matching the retval and doing CHECK-NEXT for the ret, since that will check that the call is in fact in tail position, which is a verifier requirement for musttail. https://reviews.llvm.org/D53586 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D53586: Implement Function Multiversioning for Non-ELF Systems.
erichkeane updated this revision to Diff 171118. erichkeane added a comment. ACTUALLY add the test this time :/ sorry about that! https://reviews.llvm.org/D53586 Files: include/clang/AST/Decl.h include/clang/Basic/Attr.td include/clang/Basic/TargetInfo.h lib/AST/Decl.cpp lib/Basic/Targets/X86.h lib/CodeGen/CodeGenFunction.cpp lib/CodeGen/CodeGenFunction.h lib/CodeGen/CodeGenModule.cpp lib/CodeGen/CodeGenModule.h test/CodeGen/attr-cpuspecific.c test/CodeGen/attr-target-mv-func-ptrs.c test/CodeGen/attr-target-mv-va-args.c test/CodeGen/attr-target-mv.c test/CodeGenCXX/attr-target-mv-diff-ns.cpp test/CodeGenCXX/attr-target-mv-func-ptrs.cpp test/CodeGenCXX/attr-target-mv-inalloca.cpp test/CodeGenCXX/attr-target-mv-member-funcs.cpp test/CodeGenCXX/attr-target-mv-out-of-line-defs.cpp test/CodeGenCXX/attr-target-mv-overloads.cpp test/Sema/attr-target-mv-bad-target.c Index: test/Sema/attr-target-mv-bad-target.c === --- test/Sema/attr-target-mv-bad-target.c +++ test/Sema/attr-target-mv-bad-target.c @@ -1,4 +1,3 @@ -// RUN: %clang_cc1 -triple x86_64-windows-pc -fsyntax-only -verify %s // RUN: %clang_cc1 -triple arm-none-eabi -fsyntax-only -verify %s int __attribute__((target("sse4.2"))) redecl1(void) { return 1; } Index: test/CodeGenCXX/attr-target-mv-overloads.cpp === --- test/CodeGenCXX/attr-target-mv-overloads.cpp +++ test/CodeGenCXX/attr-target-mv-overloads.cpp @@ -1,4 +1,5 @@ -// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=LINUX +// RUN: %clang_cc1 -std=c++11 -triple x86_64-windows-pc -emit-llvm %s -o - | FileCheck %s --check-prefix=WINDOWS int __attribute__((target("sse4.2"))) foo_overload(int) { return 0; } int __attribute__((target("arch=sandybridge"))) foo_overload(int); @@ -13,38 +14,69 @@ return foo_overload() + foo_overload(1); } -// CHECK: @_Z12foo_overloadv.ifunc = ifunc i32 (), i32 ()* ()* @_Z12foo_overloadv.resolver -// CHECK: @_Z12foo_overloadi.ifunc = ifunc i32 (i32), i32 (i32)* ()* @_Z12foo_overloadi.resolver - - -// CHECK: define i32 @_Z12foo_overloadi.sse4.2(i32) -// CHECK: ret i32 0 -// CHECK: define i32 @_Z12foo_overloadi.arch_ivybridge(i32) -// CHECK: ret i32 1 -// CHECK: define i32 @_Z12foo_overloadi(i32) -// CHECK: ret i32 2 -// CHECK: define i32 @_Z12foo_overloadv.sse4.2() -// CHECK: ret i32 0 -// CHECK: define i32 @_Z12foo_overloadv.arch_ivybridge() -// CHECK: ret i32 1 -// CHECK: define i32 @_Z12foo_overloadv() -// CHECK: ret i32 2 - -// CHECK: define i32 @_Z4bar2v() -// CHECK: call i32 @_Z12foo_overloadv.ifunc() -// CHECK: call i32 @_Z12foo_overloadi.ifunc(i32 1) - -// CHECK: define i32 ()* @_Z12foo_overloadv.resolver() comdat -// CHECK: ret i32 ()* @_Z12foo_overloadv.arch_sandybridge -// CHECK: ret i32 ()* @_Z12foo_overloadv.arch_ivybridge -// CHECK: ret i32 ()* @_Z12foo_overloadv.sse4.2 -// CHECK: ret i32 ()* @_Z12foo_overloadv - -// CHECK: define i32 (i32)* @_Z12foo_overloadi.resolver() comdat -// CHECK: ret i32 (i32)* @_Z12foo_overloadi.arch_sandybridge -// CHECK: ret i32 (i32)* @_Z12foo_overloadi.arch_ivybridge -// CHECK: ret i32 (i32)* @_Z12foo_overloadi.sse4.2 -// CHECK: ret i32 (i32)* @_Z12foo_overloadi - -// CHECK: declare i32 @_Z12foo_overloadv.arch_sandybridge() -// CHECK: declare i32 @_Z12foo_overloadi.arch_sandybridge(i32) +// LINUX: @_Z12foo_overloadv.ifunc = ifunc i32 (), i32 ()* ()* @_Z12foo_overloadv.resolver +// LINUX: @_Z12foo_overloadi.ifunc = ifunc i32 (i32), i32 (i32)* ()* @_Z12foo_overloadi.resolver + +// LINUX: define i32 @_Z12foo_overloadi.sse4.2(i32) +// LINUX: ret i32 0 +// LINUX: define i32 @_Z12foo_overloadi.arch_ivybridge(i32) +// LINUX: ret i32 1 +// LINUX: define i32 @_Z12foo_overloadi(i32) +// LINUX: ret i32 2 +// LINUX: define i32 @_Z12foo_overloadv.sse4.2() +// LINUX: ret i32 0 +// LINUX: define i32 @_Z12foo_overloadv.arch_ivybridge() +// LINUX: ret i32 1 +// LINUX: define i32 @_Z12foo_overloadv() +// LINUX: ret i32 2 + +// WINDOWS: define dso_local i32 @"?foo_overload@@YAHH@Z.sse4.2"(i32) +// WINDOWS: ret i32 0 +// WINDOWS: define dso_local i32 @"?foo_overload@@YAHH@Z.arch_ivybridge"(i32) +// WINDOWS: ret i32 1 +// WINDOWS: define dso_local i32 @"?foo_overload@@YAHH@Z"(i32) +// WINDOWS: ret i32 2 +// WINDOWS: define dso_local i32 @"?foo_overload@@YAHXZ.sse4.2"() +// WINDOWS: ret i32 0 +// WINDOWS: define dso_local i32 @"?foo_overload@@YAHXZ.arch_ivybridge"() +// WINDOWS: ret i32 1 +// WINDOWS: define dso_local i32 @"?foo_overload@@YAHXZ"() +// WINDOWS: ret i32 2 + +// LINUX: define i32 @_Z4bar2v() +// LINUX: call i32 @_Z12foo_overloadv.ifunc() +// LINUX: call i32 @_Z12foo_overloadi.ifunc(i32 1) + +// WINDOWS: define dso_local i32 @"?bar2@@YAHXZ"() +// WINDOWS: call i32 @"?foo_overload@@YAHXZ.resolver"() +// WINDOWS: call
[PATCH] D53586: Implement Function Multiversioning for Non-ELF Systems.
erichkeane updated this revision to Diff 171088. erichkeane marked an inline comment as done. erichkeane added a comment. Added test as requested by @rnk. How's it look? I hope I got the balance of check-lines right. https://reviews.llvm.org/D53586 Files: include/clang/AST/Decl.h include/clang/Basic/Attr.td include/clang/Basic/TargetInfo.h lib/AST/Decl.cpp lib/Basic/Targets/X86.h lib/CodeGen/CodeGenFunction.cpp lib/CodeGen/CodeGenFunction.h lib/CodeGen/CodeGenModule.cpp lib/CodeGen/CodeGenModule.h test/CodeGen/attr-cpuspecific.c test/CodeGen/attr-target-mv-func-ptrs.c test/CodeGen/attr-target-mv-va-args.c test/CodeGen/attr-target-mv.c test/CodeGenCXX/attr-target-mv-diff-ns.cpp test/CodeGenCXX/attr-target-mv-func-ptrs.cpp test/CodeGenCXX/attr-target-mv-member-funcs.cpp test/CodeGenCXX/attr-target-mv-out-of-line-defs.cpp test/CodeGenCXX/attr-target-mv-overloads.cpp test/Sema/attr-target-mv-bad-target.c Index: test/Sema/attr-target-mv-bad-target.c === --- test/Sema/attr-target-mv-bad-target.c +++ test/Sema/attr-target-mv-bad-target.c @@ -1,4 +1,3 @@ -// RUN: %clang_cc1 -triple x86_64-windows-pc -fsyntax-only -verify %s // RUN: %clang_cc1 -triple arm-none-eabi -fsyntax-only -verify %s int __attribute__((target("sse4.2"))) redecl1(void) { return 1; } Index: test/CodeGenCXX/attr-target-mv-overloads.cpp === --- test/CodeGenCXX/attr-target-mv-overloads.cpp +++ test/CodeGenCXX/attr-target-mv-overloads.cpp @@ -1,4 +1,5 @@ -// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=LINUX +// RUN: %clang_cc1 -std=c++11 -triple x86_64-windows-pc -emit-llvm %s -o - | FileCheck %s --check-prefix=WINDOWS int __attribute__((target("sse4.2"))) foo_overload(int) { return 0; } int __attribute__((target("arch=sandybridge"))) foo_overload(int); @@ -13,38 +14,69 @@ return foo_overload() + foo_overload(1); } -// CHECK: @_Z12foo_overloadv.ifunc = ifunc i32 (), i32 ()* ()* @_Z12foo_overloadv.resolver -// CHECK: @_Z12foo_overloadi.ifunc = ifunc i32 (i32), i32 (i32)* ()* @_Z12foo_overloadi.resolver - - -// CHECK: define i32 @_Z12foo_overloadi.sse4.2(i32) -// CHECK: ret i32 0 -// CHECK: define i32 @_Z12foo_overloadi.arch_ivybridge(i32) -// CHECK: ret i32 1 -// CHECK: define i32 @_Z12foo_overloadi(i32) -// CHECK: ret i32 2 -// CHECK: define i32 @_Z12foo_overloadv.sse4.2() -// CHECK: ret i32 0 -// CHECK: define i32 @_Z12foo_overloadv.arch_ivybridge() -// CHECK: ret i32 1 -// CHECK: define i32 @_Z12foo_overloadv() -// CHECK: ret i32 2 - -// CHECK: define i32 @_Z4bar2v() -// CHECK: call i32 @_Z12foo_overloadv.ifunc() -// CHECK: call i32 @_Z12foo_overloadi.ifunc(i32 1) - -// CHECK: define i32 ()* @_Z12foo_overloadv.resolver() comdat -// CHECK: ret i32 ()* @_Z12foo_overloadv.arch_sandybridge -// CHECK: ret i32 ()* @_Z12foo_overloadv.arch_ivybridge -// CHECK: ret i32 ()* @_Z12foo_overloadv.sse4.2 -// CHECK: ret i32 ()* @_Z12foo_overloadv - -// CHECK: define i32 (i32)* @_Z12foo_overloadi.resolver() comdat -// CHECK: ret i32 (i32)* @_Z12foo_overloadi.arch_sandybridge -// CHECK: ret i32 (i32)* @_Z12foo_overloadi.arch_ivybridge -// CHECK: ret i32 (i32)* @_Z12foo_overloadi.sse4.2 -// CHECK: ret i32 (i32)* @_Z12foo_overloadi - -// CHECK: declare i32 @_Z12foo_overloadv.arch_sandybridge() -// CHECK: declare i32 @_Z12foo_overloadi.arch_sandybridge(i32) +// LINUX: @_Z12foo_overloadv.ifunc = ifunc i32 (), i32 ()* ()* @_Z12foo_overloadv.resolver +// LINUX: @_Z12foo_overloadi.ifunc = ifunc i32 (i32), i32 (i32)* ()* @_Z12foo_overloadi.resolver + +// LINUX: define i32 @_Z12foo_overloadi.sse4.2(i32) +// LINUX: ret i32 0 +// LINUX: define i32 @_Z12foo_overloadi.arch_ivybridge(i32) +// LINUX: ret i32 1 +// LINUX: define i32 @_Z12foo_overloadi(i32) +// LINUX: ret i32 2 +// LINUX: define i32 @_Z12foo_overloadv.sse4.2() +// LINUX: ret i32 0 +// LINUX: define i32 @_Z12foo_overloadv.arch_ivybridge() +// LINUX: ret i32 1 +// LINUX: define i32 @_Z12foo_overloadv() +// LINUX: ret i32 2 + +// WINDOWS: define dso_local i32 @"?foo_overload@@YAHH@Z.sse4.2"(i32) +// WINDOWS: ret i32 0 +// WINDOWS: define dso_local i32 @"?foo_overload@@YAHH@Z.arch_ivybridge"(i32) +// WINDOWS: ret i32 1 +// WINDOWS: define dso_local i32 @"?foo_overload@@YAHH@Z"(i32) +// WINDOWS: ret i32 2 +// WINDOWS: define dso_local i32 @"?foo_overload@@YAHXZ.sse4.2"() +// WINDOWS: ret i32 0 +// WINDOWS: define dso_local i32 @"?foo_overload@@YAHXZ.arch_ivybridge"() +// WINDOWS: ret i32 1 +// WINDOWS: define dso_local i32 @"?foo_overload@@YAHXZ"() +// WINDOWS: ret i32 2 + +// LINUX: define i32 @_Z4bar2v() +// LINUX: call i32 @_Z12foo_overloadv.ifunc() +// LINUX: call i32 @_Z12foo_overloadi.ifunc(i32 1) + +// WINDOWS: define dso_local i32 @"?bar2@@YAHXZ"() +// WINDOWS: call i32 @"?foo_ove
[PATCH] D53586: Implement Function Multiversioning for Non-ELF Systems.
rnk added inline comments. Comment at: lib/CodeGen/CodeGenFunction.cpp:2395 + + llvm::CallInst *Result = Builder.CreateCall(FuncToReturn, Args); + rnk wrote: > erichkeane wrote: > > erichkeane wrote: > > > rnk wrote: > > > > This approach is... not going to work in the general case. Consider, > > > > for example, varargs. Then consider 32-bit x86 inalloca, which is quite > > > > widespread. Any non-trivially copyable object is not going to be > > > > passable through such a thunk. You might consider looking at > > > > CodeGenFunction::EmitMustTailThunk instead. > > > Oh dear... > > > I'm unfamiliar with EmitMustTailThunk, is it self explanatory? Any > > > chance you can expound? Should I call/use that function, or copy out of > > > it? > > I looked through that function, and it seems to do very similar things to > > this... > > > > It FIRST does the small-vector conversion to a value* array like I do. > > > > Second, it 'adjusts' the 'this' parameter. This doesn't seem like it needs > > to happen, because the type isn't changing, right? my 'this' pointer is > > still pass-on-able. > > > > Third, it creates the call, then marks it TCK_MustTail. > > > > Forth, it sets the attributes/calling convention of the 'call' object, but > > I'd expect that to come from the llvm::Function object in the 'CreateCall', > > right? I can add them after the fact I guess, if we see value.*(see below) > > > > Finally, it does the CreateRetVoid/CreateRet like this function does below. > > > > *I DO have a test that claims that my IR is broken if I set the call > > attributes. Curiously only 1 of my tests, but the error is: > > "cannot guarantee tail call due to mismatched ABI impacting function > > attributes" > > > > The only difference in attributes is target-features and target-cpu though, > > so I don't know what causes this. > Right, the this-adjustment is definitely unnecessary for this kind of CPU > dispatch thunk. > > The important thing is that marking the call as `musttail` ensures that you > get perfect forwarding of all arguments, even varargs and inalloca. Maybe > it's enough to just mark the call you are creating that way. I think the code > you are generating obeys all the musttail invariants, so that might be best. > > The "cannot guarantee tail call due to mismatched ABI impacting function > attributes" verifier check is only supposed to fire when parameter attributes > like `byval` and `inreg` differ between the call site and the dispatcher > function. It shouldn't care about target-cpu or target-features. It's probably worth adding a test that uses inalloca, since that's why we're doing this nonsense. It would look like: ``` struct Foo { Foo(); Foo(const Foo &o); ~Foo(); int x; }; int __attribute__((target("default"))) bar(Foo o) { return o.x; } int __attribute__((target("sse4.2"))) bar(Foo o) { return o.x + 1; } int __attribute__((target("arch=ivybridge"))) bar(Foo o) { return o.x + 2; } ``` Targetting i686-windows-msvc. https://reviews.llvm.org/D53586 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D53586: Implement Function Multiversioning for Non-ELF Systems.
echristo accepted this revision. echristo added a comment. This revision is now accepted and ready to land. All of the target specific stuff looks fine to me. I'm going to defer to rnk about the windows side of things and aaron for the attributes. https://reviews.llvm.org/D53586 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D53586: Implement Function Multiversioning for Non-ELF Systems.
erichkeane updated this revision to Diff 170987. erichkeane added a comment. did everything suggested by @rnk as far as I know. Thanks again for the reviews! https://reviews.llvm.org/D53586 Files: include/clang/AST/Decl.h include/clang/Basic/Attr.td include/clang/Basic/TargetInfo.h lib/AST/Decl.cpp lib/Basic/Targets/X86.h lib/CodeGen/CodeGenFunction.cpp lib/CodeGen/CodeGenFunction.h lib/CodeGen/CodeGenModule.cpp lib/CodeGen/CodeGenModule.h test/CodeGen/attr-cpuspecific.c test/CodeGen/attr-target-mv-func-ptrs.c test/CodeGen/attr-target-mv-va-args.c test/CodeGen/attr-target-mv.c test/CodeGenCXX/attr-target-mv-diff-ns.cpp test/CodeGenCXX/attr-target-mv-func-ptrs.cpp test/CodeGenCXX/attr-target-mv-member-funcs.cpp test/CodeGenCXX/attr-target-mv-out-of-line-defs.cpp test/CodeGenCXX/attr-target-mv-overloads.cpp test/Sema/attr-target-mv-bad-target.c Index: test/Sema/attr-target-mv-bad-target.c === --- test/Sema/attr-target-mv-bad-target.c +++ test/Sema/attr-target-mv-bad-target.c @@ -1,4 +1,3 @@ -// RUN: %clang_cc1 -triple x86_64-windows-pc -fsyntax-only -verify %s // RUN: %clang_cc1 -triple arm-none-eabi -fsyntax-only -verify %s int __attribute__((target("sse4.2"))) redecl1(void) { return 1; } Index: test/CodeGenCXX/attr-target-mv-overloads.cpp === --- test/CodeGenCXX/attr-target-mv-overloads.cpp +++ test/CodeGenCXX/attr-target-mv-overloads.cpp @@ -1,4 +1,5 @@ -// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=LINUX +// RUN: %clang_cc1 -std=c++11 -triple x86_64-windows-pc -emit-llvm %s -o - | FileCheck %s --check-prefix=WINDOWS int __attribute__((target("sse4.2"))) foo_overload(int) { return 0; } int __attribute__((target("arch=sandybridge"))) foo_overload(int); @@ -13,38 +14,69 @@ return foo_overload() + foo_overload(1); } -// CHECK: @_Z12foo_overloadv.ifunc = ifunc i32 (), i32 ()* ()* @_Z12foo_overloadv.resolver -// CHECK: @_Z12foo_overloadi.ifunc = ifunc i32 (i32), i32 (i32)* ()* @_Z12foo_overloadi.resolver - - -// CHECK: define i32 @_Z12foo_overloadi.sse4.2(i32) -// CHECK: ret i32 0 -// CHECK: define i32 @_Z12foo_overloadi.arch_ivybridge(i32) -// CHECK: ret i32 1 -// CHECK: define i32 @_Z12foo_overloadi(i32) -// CHECK: ret i32 2 -// CHECK: define i32 @_Z12foo_overloadv.sse4.2() -// CHECK: ret i32 0 -// CHECK: define i32 @_Z12foo_overloadv.arch_ivybridge() -// CHECK: ret i32 1 -// CHECK: define i32 @_Z12foo_overloadv() -// CHECK: ret i32 2 - -// CHECK: define i32 @_Z4bar2v() -// CHECK: call i32 @_Z12foo_overloadv.ifunc() -// CHECK: call i32 @_Z12foo_overloadi.ifunc(i32 1) - -// CHECK: define i32 ()* @_Z12foo_overloadv.resolver() comdat -// CHECK: ret i32 ()* @_Z12foo_overloadv.arch_sandybridge -// CHECK: ret i32 ()* @_Z12foo_overloadv.arch_ivybridge -// CHECK: ret i32 ()* @_Z12foo_overloadv.sse4.2 -// CHECK: ret i32 ()* @_Z12foo_overloadv - -// CHECK: define i32 (i32)* @_Z12foo_overloadi.resolver() comdat -// CHECK: ret i32 (i32)* @_Z12foo_overloadi.arch_sandybridge -// CHECK: ret i32 (i32)* @_Z12foo_overloadi.arch_ivybridge -// CHECK: ret i32 (i32)* @_Z12foo_overloadi.sse4.2 -// CHECK: ret i32 (i32)* @_Z12foo_overloadi - -// CHECK: declare i32 @_Z12foo_overloadv.arch_sandybridge() -// CHECK: declare i32 @_Z12foo_overloadi.arch_sandybridge(i32) +// LINUX: @_Z12foo_overloadv.ifunc = ifunc i32 (), i32 ()* ()* @_Z12foo_overloadv.resolver +// LINUX: @_Z12foo_overloadi.ifunc = ifunc i32 (i32), i32 (i32)* ()* @_Z12foo_overloadi.resolver + +// LINUX: define i32 @_Z12foo_overloadi.sse4.2(i32) +// LINUX: ret i32 0 +// LINUX: define i32 @_Z12foo_overloadi.arch_ivybridge(i32) +// LINUX: ret i32 1 +// LINUX: define i32 @_Z12foo_overloadi(i32) +// LINUX: ret i32 2 +// LINUX: define i32 @_Z12foo_overloadv.sse4.2() +// LINUX: ret i32 0 +// LINUX: define i32 @_Z12foo_overloadv.arch_ivybridge() +// LINUX: ret i32 1 +// LINUX: define i32 @_Z12foo_overloadv() +// LINUX: ret i32 2 + +// WINDOWS: define dso_local i32 @"?foo_overload@@YAHH@Z.sse4.2"(i32) +// WINDOWS: ret i32 0 +// WINDOWS: define dso_local i32 @"?foo_overload@@YAHH@Z.arch_ivybridge"(i32) +// WINDOWS: ret i32 1 +// WINDOWS: define dso_local i32 @"?foo_overload@@YAHH@Z"(i32) +// WINDOWS: ret i32 2 +// WINDOWS: define dso_local i32 @"?foo_overload@@YAHXZ.sse4.2"() +// WINDOWS: ret i32 0 +// WINDOWS: define dso_local i32 @"?foo_overload@@YAHXZ.arch_ivybridge"() +// WINDOWS: ret i32 1 +// WINDOWS: define dso_local i32 @"?foo_overload@@YAHXZ"() +// WINDOWS: ret i32 2 + +// LINUX: define i32 @_Z4bar2v() +// LINUX: call i32 @_Z12foo_overloadv.ifunc() +// LINUX: call i32 @_Z12foo_overloadi.ifunc(i32 1) + +// WINDOWS: define dso_local i32 @"?bar2@@YAHXZ"() +// WINDOWS: call i32 @"?foo_overload@@YAHXZ.resolver"() +// WINDOWS: call i32 @"?foo_overlo
[PATCH] D53586: Implement Function Multiversioning for Non-ELF Systems.
rnk added inline comments. Comment at: lib/CodeGen/CodeGenFunction.cpp:2391-2393 + llvm::SmallVector Args; + llvm::for_each(Resolver->args(), + [&](llvm::Argument &Arg) { Args.push_back(&Arg); }); erichkeane wrote: > rnk wrote: > > Surely this would be simpler as a range for or `Args{Resolver->arg_begin() > > ...};` > Surely... Unfortunately the types don't match. Resolver Args are an array of > Arguments (so the iterator is a Arg*). > > HOWEVER, "Args" here needs to be an array ref to Value*, so the iterator > would have to be a Value** (not a Arg*/Value*). The point of this small > vector is to create an array of _pointers_ from the array of Arguments. Right. :) Comment at: lib/CodeGen/CodeGenFunction.cpp:2395 + + llvm::CallInst *Result = Builder.CreateCall(FuncToReturn, Args); + erichkeane wrote: > erichkeane wrote: > > rnk wrote: > > > This approach is... not going to work in the general case. Consider, for > > > example, varargs. Then consider 32-bit x86 inalloca, which is quite > > > widespread. Any non-trivially copyable object is not going to be passable > > > through such a thunk. You might consider looking at > > > CodeGenFunction::EmitMustTailThunk instead. > > Oh dear... > > I'm unfamiliar with EmitMustTailThunk, is it self explanatory? Any chance > > you can expound? Should I call/use that function, or copy out of it? > I looked through that function, and it seems to do very similar things to > this... > > It FIRST does the small-vector conversion to a value* array like I do. > > Second, it 'adjusts' the 'this' parameter. This doesn't seem like it needs > to happen, because the type isn't changing, right? my 'this' pointer is > still pass-on-able. > > Third, it creates the call, then marks it TCK_MustTail. > > Forth, it sets the attributes/calling convention of the 'call' object, but > I'd expect that to come from the llvm::Function object in the 'CreateCall', > right? I can add them after the fact I guess, if we see value.*(see below) > > Finally, it does the CreateRetVoid/CreateRet like this function does below. > > *I DO have a test that claims that my IR is broken if I set the call > attributes. Curiously only 1 of my tests, but the error is: > "cannot guarantee tail call due to mismatched ABI impacting function > attributes" > > The only difference in attributes is target-features and target-cpu though, > so I don't know what causes this. Right, the this-adjustment is definitely unnecessary for this kind of CPU dispatch thunk. The important thing is that marking the call as `musttail` ensures that you get perfect forwarding of all arguments, even varargs and inalloca. Maybe it's enough to just mark the call you are creating that way. I think the code you are generating obeys all the musttail invariants, so that might be best. The "cannot guarantee tail call due to mismatched ABI impacting function attributes" verifier check is only supposed to fire when parameter attributes like `byval` and `inreg` differ between the call site and the dispatcher function. It shouldn't care about target-cpu or target-features. Comment at: test/CodeGenCXX/attr-target-mv-overloads.cpp:74 +// WINDOWS: define dso_local i32 @"?foo_overload@@YAHH@Z.resolver"(i32) comdat +// WINDOWS: call i32 @"?foo_overload@@YAHH@Z.arch_sandybridge" +// WINDOWS: call i32 @"?foo_overload@@YAHH@Z.arch_ivybridge" erichkeane wrote: > rnk wrote: > > Does dumpbin demangle these names correctly? Just curious. > I don't know, I'd suspect not. However, demangler.com (not sure what it > uses?) seems to demangle them ignoring everything after the '.'. Neat. Repository: rC Clang https://reviews.llvm.org/D53586 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D53586: Implement Function Multiversioning for Non-ELF Systems.
erichkeane marked an inline comment as done. erichkeane added a comment. @rnk: I looked into the EmitMustTailThunk code, and don't terribly see what I'm doing wrong. I clearly cannot CALL that function (since this works with non-CXXMethodDecls), so I looked into using it. However, I saw that it does a couple of small things that seem useful, but not anything that seems to fix your concerns. Can you espouse a bit for me? I'm likely missing something, but am out of ideas on what it could be. Comment at: lib/CodeGen/CodeGenFunction.cpp:2395 + + llvm::CallInst *Result = Builder.CreateCall(FuncToReturn, Args); + erichkeane wrote: > rnk wrote: > > This approach is... not going to work in the general case. Consider, for > > example, varargs. Then consider 32-bit x86 inalloca, which is quite > > widespread. Any non-trivially copyable object is not going to be passable > > through such a thunk. You might consider looking at > > CodeGenFunction::EmitMustTailThunk instead. > Oh dear... > I'm unfamiliar with EmitMustTailThunk, is it self explanatory? Any chance > you can expound? Should I call/use that function, or copy out of it? I looked through that function, and it seems to do very similar things to this... It FIRST does the small-vector conversion to a value* array like I do. Second, it 'adjusts' the 'this' parameter. This doesn't seem like it needs to happen, because the type isn't changing, right? my 'this' pointer is still pass-on-able. Third, it creates the call, then marks it TCK_MustTail. Forth, it sets the attributes/calling convention of the 'call' object, but I'd expect that to come from the llvm::Function object in the 'CreateCall', right? I can add them after the fact I guess, if we see value.*(see below) Finally, it does the CreateRetVoid/CreateRet like this function does below. *I DO have a test that claims that my IR is broken if I set the call attributes. Curiously only 1 of my tests, but the error is: "cannot guarantee tail call due to mismatched ABI impacting function attributes" The only difference in attributes is target-features and target-cpu though, so I don't know what causes this. Repository: rC Clang https://reviews.llvm.org/D53586 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D53586: Implement Function Multiversioning for Non-ELF Systems.
erichkeane marked 2 inline comments as done. erichkeane added a comment. In https://reviews.llvm.org/D53586#1273546, @rnk wrote: > Seems reasonable. Should the resolver still be called `?foo@.resolver`, or > should it get a new name, since it is quite functionally different now? I'm not attached to the name. I didn't have a good alternative (I'd thought of 'dispatcher' at one point, but figured that was because my mind was attached to cpu-dispatch). Implementation wise, '.resolver' is slightly easier, but not enough to motivate the discussion. If you like '.dispatcher' instead of '.resolver', I can do that, otherwise a brainstorming session would be appreciated! Comment at: lib/CodeGen/CodeGenFunction.cpp:2391-2393 + llvm::SmallVector Args; + llvm::for_each(Resolver->args(), + [&](llvm::Argument &Arg) { Args.push_back(&Arg); }); rnk wrote: > Surely this would be simpler as a range for or `Args{Resolver->arg_begin() > ...};` Surely... Unfortunately the types don't match. Resolver Args are an array of Arguments (so the iterator is a Arg*). HOWEVER, "Args" here needs to be an array ref to Value*, so the iterator would have to be a Value** (not a Arg*/Value*). The point of this small vector is to create an array of _pointers_ from the array of Arguments. Comment at: lib/CodeGen/CodeGenFunction.cpp:2395 + + llvm::CallInst *Result = Builder.CreateCall(FuncToReturn, Args); + rnk wrote: > This approach is... not going to work in the general case. Consider, for > example, varargs. Then consider 32-bit x86 inalloca, which is quite > widespread. Any non-trivially copyable object is not going to be passable > through such a thunk. You might consider looking at > CodeGenFunction::EmitMustTailThunk instead. Oh dear... I'm unfamiliar with EmitMustTailThunk, is it self explanatory? Any chance you can expound? Should I call/use that function, or copy out of it? Comment at: lib/CodeGen/CodeGenFunction.cpp:2432 llvm::BasicBlock *RetBlock = createBasicBlock("resolver_return", Resolver); llvm::IRBuilder<> RetBuilder(RetBlock); + CreateMultiVersionResolverReturn(Resolver, RetBuilder, RO.Function, rnk wrote: > You can make this a CGBuilderTy. Just pass it a type cache. I'd tried that a while and could never find a type cache to use. I'd not realized until just now (searching more for usages) that CodeGenFunction is one! Comment at: test/CodeGenCXX/attr-target-mv-overloads.cpp:74 +// WINDOWS: define dso_local i32 @"?foo_overload@@YAHH@Z.resolver"(i32) comdat +// WINDOWS: call i32 @"?foo_overload@@YAHH@Z.arch_sandybridge" +// WINDOWS: call i32 @"?foo_overload@@YAHH@Z.arch_ivybridge" rnk wrote: > Does dumpbin demangle these names correctly? Just curious. I don't know, I'd suspect not. However, demangler.com (not sure what it uses?) seems to demangle them ignoring everything after the '.'. Repository: rC Clang https://reviews.llvm.org/D53586 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D53586: Implement Function Multiversioning for Non-ELF Systems.
rnk added a comment. Seems reasonable. Should the resolver still be called `?foo@.resolver`, or should it get a new name, since it is quite functionally different now? Comment at: lib/CodeGen/CodeGenFunction.cpp:2381 + +template +static void CreateMultiVersionResolverReturn(llvm::Function *Resolver, Rather than templating over IRBuilder and CGBuilderTy, I'd standardize on CGBuilderTy. Comment at: lib/CodeGen/CodeGenFunction.cpp:2391-2393 + llvm::SmallVector Args; + llvm::for_each(Resolver->args(), + [&](llvm::Argument &Arg) { Args.push_back(&Arg); }); Surely this would be simpler as a range for or `Args{Resolver->arg_begin() ...};` Comment at: lib/CodeGen/CodeGenFunction.cpp:2395 + + llvm::CallInst *Result = Builder.CreateCall(FuncToReturn, Args); + This approach is... not going to work in the general case. Consider, for example, varargs. Then consider 32-bit x86 inalloca, which is quite widespread. Any non-trivially copyable object is not going to be passable through such a thunk. You might consider looking at CodeGenFunction::EmitMustTailThunk instead. Comment at: lib/CodeGen/CodeGenFunction.cpp:2432 llvm::BasicBlock *RetBlock = createBasicBlock("resolver_return", Resolver); llvm::IRBuilder<> RetBuilder(RetBlock); + CreateMultiVersionResolverReturn(Resolver, RetBuilder, RO.Function, You can make this a CGBuilderTy. Just pass it a type cache. Comment at: test/CodeGenCXX/attr-target-mv-overloads.cpp:74 +// WINDOWS: define dso_local i32 @"?foo_overload@@YAHH@Z.resolver"(i32) comdat +// WINDOWS: call i32 @"?foo_overload@@YAHH@Z.arch_sandybridge" +// WINDOWS: call i32 @"?foo_overload@@YAHH@Z.arch_ivybridge" Does dumpbin demangle these names correctly? Just curious. Repository: rC Clang https://reviews.llvm.org/D53586 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D53586: Implement Function Multiversioning for Non-ELF Systems.
erichkeane created this revision. erichkeane added reviewers: echristo, rnk, aaron.ballman. erichkeane added a subscriber: mibintc. Similar to how ICC handles CPU-Dispatch on Windows, this patch uses the resolver function directly to forward the call to the proper function. This is not nearly as efficient as IFuncs of course, but is still quite useful for large functions specifically developed for certain processors. This is unfortunately still limited to x86, since it depends on __builtin_cpu_supports and __builtin_cpu_is, which are x86 builtins. The naming for the resolver/forwarding function for cpu-dispatch was taken from ICC's implementation, which uses the unmodified name for this (no mangling additions). This is possible, since cpu-dispatch uses '.A' for the 'default' version. In 'target' multiversioning, this function keeps the '.resolver' extension in order to keep the default function keeping the default mangling. Repository: rC Clang https://reviews.llvm.org/D53586 Files: include/clang/AST/Decl.h include/clang/Basic/Attr.td include/clang/Basic/TargetInfo.h lib/AST/Decl.cpp lib/Basic/Targets/X86.h lib/CodeGen/CodeGenFunction.cpp lib/CodeGen/CodeGenModule.cpp lib/CodeGen/CodeGenModule.h test/CodeGen/attr-cpuspecific.c test/CodeGen/attr-target-mv-func-ptrs.c test/CodeGen/attr-target-mv-va-args.c test/CodeGen/attr-target-mv.c test/CodeGenCXX/attr-target-mv-diff-ns.cpp test/CodeGenCXX/attr-target-mv-func-ptrs.cpp test/CodeGenCXX/attr-target-mv-member-funcs.cpp test/CodeGenCXX/attr-target-mv-out-of-line-defs.cpp test/CodeGenCXX/attr-target-mv-overloads.cpp test/Sema/attr-target-mv-bad-target.c Index: test/Sema/attr-target-mv-bad-target.c === --- test/Sema/attr-target-mv-bad-target.c +++ test/Sema/attr-target-mv-bad-target.c @@ -1,4 +1,3 @@ -// RUN: %clang_cc1 -triple x86_64-windows-pc -fsyntax-only -verify %s // RUN: %clang_cc1 -triple arm-none-eabi -fsyntax-only -verify %s int __attribute__((target("sse4.2"))) redecl1(void) { return 1; } Index: test/CodeGenCXX/attr-target-mv-overloads.cpp === --- test/CodeGenCXX/attr-target-mv-overloads.cpp +++ test/CodeGenCXX/attr-target-mv-overloads.cpp @@ -1,4 +1,5 @@ -// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=LINUX +// RUN: %clang_cc1 -std=c++11 -triple x86_64-windows-pc -emit-llvm %s -o - | FileCheck %s --check-prefix=WINDOWS int __attribute__((target("sse4.2"))) foo_overload(int) { return 0; } int __attribute__((target("arch=sandybridge"))) foo_overload(int); @@ -13,38 +14,70 @@ return foo_overload() + foo_overload(1); } -// CHECK: @_Z12foo_overloadv.ifunc = ifunc i32 (), i32 ()* ()* @_Z12foo_overloadv.resolver -// CHECK: @_Z12foo_overloadi.ifunc = ifunc i32 (i32), i32 (i32)* ()* @_Z12foo_overloadi.resolver - - -// CHECK: define i32 @_Z12foo_overloadi.sse4.2(i32) -// CHECK: ret i32 0 -// CHECK: define i32 @_Z12foo_overloadi.arch_ivybridge(i32) -// CHECK: ret i32 1 -// CHECK: define i32 @_Z12foo_overloadi(i32) -// CHECK: ret i32 2 -// CHECK: define i32 @_Z12foo_overloadv.sse4.2() -// CHECK: ret i32 0 -// CHECK: define i32 @_Z12foo_overloadv.arch_ivybridge() -// CHECK: ret i32 1 -// CHECK: define i32 @_Z12foo_overloadv() -// CHECK: ret i32 2 - -// CHECK: define i32 @_Z4bar2v() -// CHECK: call i32 @_Z12foo_overloadv.ifunc() -// CHECK: call i32 @_Z12foo_overloadi.ifunc(i32 1) - -// CHECK: define i32 ()* @_Z12foo_overloadv.resolver() comdat -// CHECK: ret i32 ()* @_Z12foo_overloadv.arch_sandybridge -// CHECK: ret i32 ()* @_Z12foo_overloadv.arch_ivybridge -// CHECK: ret i32 ()* @_Z12foo_overloadv.sse4.2 -// CHECK: ret i32 ()* @_Z12foo_overloadv - -// CHECK: define i32 (i32)* @_Z12foo_overloadi.resolver() comdat -// CHECK: ret i32 (i32)* @_Z12foo_overloadi.arch_sandybridge -// CHECK: ret i32 (i32)* @_Z12foo_overloadi.arch_ivybridge -// CHECK: ret i32 (i32)* @_Z12foo_overloadi.sse4.2 -// CHECK: ret i32 (i32)* @_Z12foo_overloadi - -// CHECK: declare i32 @_Z12foo_overloadv.arch_sandybridge() -// CHECK: declare i32 @_Z12foo_overloadi.arch_sandybridge(i32) +// LINUX: @_Z12foo_overloadv.ifunc = ifunc i32 (), i32 ()* ()* @_Z12foo_overloadv.resolver +// LINUX: @_Z12foo_overloadi.ifunc = ifunc i32 (i32), i32 (i32)* ()* @_Z12foo_overloadi.resolver + + +// LINUX: define i32 @_Z12foo_overloadi.sse4.2(i32) +// LINUX: ret i32 0 +// LINUX: define i32 @_Z12foo_overloadi.arch_ivybridge(i32) +// LINUX: ret i32 1 +// LINUX: define i32 @_Z12foo_overloadi(i32) +// LINUX: ret i32 2 +// LINUX: define i32 @_Z12foo_overloadv.sse4.2() +// LINUX: ret i32 0 +// LINUX: define i32 @_Z12foo_overloadv.arch_ivybridge() +// LINUX: ret i32 1 +// LINUX: define i32 @_Z12foo_overloadv() +// LINUX: ret i32 2 + +// WINDOWS: define dso_local i32 @"?foo_overload@@YAHH@Z.s