erichkeane updated this revision to Diff 171088. erichkeane marked an inline comment as done. erichkeane added a comment.
Added test as requested by @rnk. How's it look? I hope I got the balance of check-lines right. https://reviews.llvm.org/D53586 Files: include/clang/AST/Decl.h include/clang/Basic/Attr.td include/clang/Basic/TargetInfo.h lib/AST/Decl.cpp lib/Basic/Targets/X86.h lib/CodeGen/CodeGenFunction.cpp lib/CodeGen/CodeGenFunction.h lib/CodeGen/CodeGenModule.cpp lib/CodeGen/CodeGenModule.h test/CodeGen/attr-cpuspecific.c test/CodeGen/attr-target-mv-func-ptrs.c test/CodeGen/attr-target-mv-va-args.c test/CodeGen/attr-target-mv.c test/CodeGenCXX/attr-target-mv-diff-ns.cpp test/CodeGenCXX/attr-target-mv-func-ptrs.cpp test/CodeGenCXX/attr-target-mv-member-funcs.cpp test/CodeGenCXX/attr-target-mv-out-of-line-defs.cpp test/CodeGenCXX/attr-target-mv-overloads.cpp test/Sema/attr-target-mv-bad-target.c
Index: test/Sema/attr-target-mv-bad-target.c =================================================================== --- test/Sema/attr-target-mv-bad-target.c +++ test/Sema/attr-target-mv-bad-target.c @@ -1,4 +1,3 @@ -// RUN: %clang_cc1 -triple x86_64-windows-pc -fsyntax-only -verify %s // RUN: %clang_cc1 -triple arm-none-eabi -fsyntax-only -verify %s int __attribute__((target("sse4.2"))) redecl1(void) { return 1; } Index: test/CodeGenCXX/attr-target-mv-overloads.cpp =================================================================== --- test/CodeGenCXX/attr-target-mv-overloads.cpp +++ test/CodeGenCXX/attr-target-mv-overloads.cpp @@ -1,4 +1,5 @@ -// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=LINUX +// RUN: %clang_cc1 -std=c++11 -triple x86_64-windows-pc -emit-llvm %s -o - | FileCheck %s --check-prefix=WINDOWS int __attribute__((target("sse4.2"))) foo_overload(int) { return 0; } int __attribute__((target("arch=sandybridge"))) foo_overload(int); @@ -13,38 +14,69 @@ return foo_overload() + foo_overload(1); } -// CHECK: @_Z12foo_overloadv.ifunc = ifunc i32 (), i32 ()* ()* @_Z12foo_overloadv.resolver -// CHECK: @_Z12foo_overloadi.ifunc = ifunc i32 (i32), i32 (i32)* ()* @_Z12foo_overloadi.resolver - - -// CHECK: define i32 @_Z12foo_overloadi.sse4.2(i32) -// CHECK: ret i32 0 -// CHECK: define i32 @_Z12foo_overloadi.arch_ivybridge(i32) -// CHECK: ret i32 1 -// CHECK: define i32 @_Z12foo_overloadi(i32) -// CHECK: ret i32 2 -// CHECK: define i32 @_Z12foo_overloadv.sse4.2() -// CHECK: ret i32 0 -// CHECK: define i32 @_Z12foo_overloadv.arch_ivybridge() -// CHECK: ret i32 1 -// CHECK: define i32 @_Z12foo_overloadv() -// CHECK: ret i32 2 - -// CHECK: define i32 @_Z4bar2v() -// CHECK: call i32 @_Z12foo_overloadv.ifunc() -// CHECK: call i32 @_Z12foo_overloadi.ifunc(i32 1) - -// CHECK: define i32 ()* @_Z12foo_overloadv.resolver() comdat -// CHECK: ret i32 ()* @_Z12foo_overloadv.arch_sandybridge -// CHECK: ret i32 ()* @_Z12foo_overloadv.arch_ivybridge -// CHECK: ret i32 ()* @_Z12foo_overloadv.sse4.2 -// CHECK: ret i32 ()* @_Z12foo_overloadv - -// CHECK: define i32 (i32)* @_Z12foo_overloadi.resolver() comdat -// CHECK: ret i32 (i32)* @_Z12foo_overloadi.arch_sandybridge -// CHECK: ret i32 (i32)* @_Z12foo_overloadi.arch_ivybridge -// CHECK: ret i32 (i32)* @_Z12foo_overloadi.sse4.2 -// CHECK: ret i32 (i32)* @_Z12foo_overloadi - -// CHECK: declare i32 @_Z12foo_overloadv.arch_sandybridge() -// CHECK: declare i32 @_Z12foo_overloadi.arch_sandybridge(i32) +// LINUX: @_Z12foo_overloadv.ifunc = ifunc i32 (), i32 ()* ()* @_Z12foo_overloadv.resolver +// LINUX: @_Z12foo_overloadi.ifunc = ifunc i32 (i32), i32 (i32)* ()* @_Z12foo_overloadi.resolver + +// LINUX: define i32 @_Z12foo_overloadi.sse4.2(i32) +// LINUX: ret i32 0 +// LINUX: define i32 @_Z12foo_overloadi.arch_ivybridge(i32) +// LINUX: ret i32 1 +// LINUX: define i32 @_Z12foo_overloadi(i32) +// LINUX: ret i32 2 +// LINUX: define i32 @_Z12foo_overloadv.sse4.2() +// LINUX: ret i32 0 +// LINUX: define i32 @_Z12foo_overloadv.arch_ivybridge() +// LINUX: ret i32 1 +// LINUX: define i32 @_Z12foo_overloadv() +// LINUX: ret i32 2 + +// WINDOWS: define dso_local i32 @"?foo_overload@@YAHH@Z.sse4.2"(i32) +// WINDOWS: ret i32 0 +// WINDOWS: define dso_local i32 @"?foo_overload@@YAHH@Z.arch_ivybridge"(i32) +// WINDOWS: ret i32 1 +// WINDOWS: define dso_local i32 @"?foo_overload@@YAHH@Z"(i32) +// WINDOWS: ret i32 2 +// WINDOWS: define dso_local i32 @"?foo_overload@@YAHXZ.sse4.2"() +// WINDOWS: ret i32 0 +// WINDOWS: define dso_local i32 @"?foo_overload@@YAHXZ.arch_ivybridge"() +// WINDOWS: ret i32 1 +// WINDOWS: define dso_local i32 @"?foo_overload@@YAHXZ"() +// WINDOWS: ret i32 2 + +// LINUX: define i32 @_Z4bar2v() +// LINUX: call i32 @_Z12foo_overloadv.ifunc() +// LINUX: call i32 @_Z12foo_overloadi.ifunc(i32 1) + +// WINDOWS: define dso_local i32 @"?bar2@@YAHXZ"() +// WINDOWS: call i32 @"?foo_overload@@YAHXZ.resolver"() +// WINDOWS: call i32 @"?foo_overload@@YAHH@Z.resolver"(i32 1) + +// LINUX: define i32 ()* @_Z12foo_overloadv.resolver() comdat +// LINUX: ret i32 ()* @_Z12foo_overloadv.arch_sandybridge +// LINUX: ret i32 ()* @_Z12foo_overloadv.arch_ivybridge +// LINUX: ret i32 ()* @_Z12foo_overloadv.sse4.2 +// LINUX: ret i32 ()* @_Z12foo_overloadv + +// WINDOWS: define dso_local i32 @"?foo_overload@@YAHXZ.resolver"() comdat +// WINDOWS: call i32 @"?foo_overload@@YAHXZ.arch_sandybridge" +// WINDOWS: call i32 @"?foo_overload@@YAHXZ.arch_ivybridge" +// WINDOWS: call i32 @"?foo_overload@@YAHXZ.sse4.2" +// WINDOWS: call i32 @"?foo_overload@@YAHXZ" + +// LINUX: define i32 (i32)* @_Z12foo_overloadi.resolver() comdat +// LINUX: ret i32 (i32)* @_Z12foo_overloadi.arch_sandybridge +// LINUX: ret i32 (i32)* @_Z12foo_overloadi.arch_ivybridge +// LINUX: ret i32 (i32)* @_Z12foo_overloadi.sse4.2 +// LINUX: ret i32 (i32)* @_Z12foo_overloadi + +// WINDOWS: define dso_local i32 @"?foo_overload@@YAHH@Z.resolver"(i32) comdat +// WINDOWS: call i32 @"?foo_overload@@YAHH@Z.arch_sandybridge" +// WINDOWS: call i32 @"?foo_overload@@YAHH@Z.arch_ivybridge" +// WINDOWS: call i32 @"?foo_overload@@YAHH@Z.sse4.2" +// WINDOWS: call i32 @"?foo_overload@@YAHH@Z" + +// LINUX: declare i32 @_Z12foo_overloadv.arch_sandybridge() +// LINUX: declare i32 @_Z12foo_overloadi.arch_sandybridge(i32) + +// WINDOWS: declare dso_local i32 @"?foo_overload@@YAHXZ.arch_sandybridge"() +// WINDOWS: declare dso_local i32 @"?foo_overload@@YAHH@Z.arch_sandybridge"(i32) Index: test/CodeGenCXX/attr-target-mv-out-of-line-defs.cpp =================================================================== --- test/CodeGenCXX/attr-target-mv-out-of-line-defs.cpp +++ test/CodeGenCXX/attr-target-mv-out-of-line-defs.cpp @@ -1,4 +1,5 @@ -// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=LINUX +// RUN: %clang_cc1 -std=c++11 -triple x86_64-windows-pc -emit-llvm %s -o - | FileCheck %s --check-prefix=WINDOWS struct S { int __attribute__((target("sse4.2"))) foo(int); int __attribute__((target("arch=sandybridge"))) foo(int); @@ -15,25 +16,46 @@ return s.foo(0); } -// CHECK: @_ZN1S3fooEi.ifunc = ifunc i32 (%struct.S*, i32), i32 (%struct.S*, i32)* ()* @_ZN1S3fooEi.resolver +// LINUX: @_ZN1S3fooEi.ifunc = ifunc i32 (%struct.S*, i32), i32 (%struct.S*, i32)* ()* @_ZN1S3fooEi.resolver -// CHECK: define i32 @_ZN1S3fooEi(%struct.S* %this, i32) -// CHECK: ret i32 2 +// LINUX: define i32 @_ZN1S3fooEi(%struct.S* %this, i32) +// LINUX: ret i32 2 -// CHECK: define i32 @_ZN1S3fooEi.sse4.2(%struct.S* %this, i32) -// CHECK: ret i32 0 +// WINDOWS: define dso_local i32 @"?foo@S@@QEAAHH@Z"(%struct.S* %this, i32) +// WINDOWS: ret i32 2 -// CHECK: define i32 @_ZN1S3fooEi.arch_ivybridge(%struct.S* %this, i32) -// CHECK: ret i32 1 +// LINUX: define i32 @_ZN1S3fooEi.sse4.2(%struct.S* %this, i32) +// LINUX: ret i32 0 -// CHECK: define i32 @_Z3barv() -// CHECK: %s = alloca %struct.S, align 1 -// CHECK: %call = call i32 @_ZN1S3fooEi.ifunc(%struct.S* %s, i32 0) +// WINDOWS: define dso_local i32 @"?foo@S@@QEAAHH@Z.sse4.2"(%struct.S* %this, i32) +// WINDOWS: ret i32 0 -// CHECK: define i32 (%struct.S*, i32)* @_ZN1S3fooEi.resolver() comdat -// CHECK: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.arch_sandybridge -// CHECK: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.arch_ivybridge -// CHECK: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.sse4.2 -// CHECK: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi +// LINUX: define i32 @_ZN1S3fooEi.arch_ivybridge(%struct.S* %this, i32) +// LINUX: ret i32 1 -// CHECK: declare i32 @_ZN1S3fooEi.arch_sandybridge(%struct.S*, i32) +// WINDOWS: define dso_local i32 @"?foo@S@@QEAAHH@Z.arch_ivybridge"(%struct.S* %this, i32) +// WINDOWS: ret i32 1 + +// LINUX: define i32 @_Z3barv() +// LINUX: %s = alloca %struct.S, align 1 +// LINUX: %call = call i32 @_ZN1S3fooEi.ifunc(%struct.S* %s, i32 0) + +// WINDOWS: define dso_local i32 @"?bar@@YAHXZ"() +// WINDOWS: %s = alloca %struct.S, align 1 +// WINDOWS: %call = call i32 @"?foo@S@@QEAAHH@Z.resolver"(%struct.S* %s, i32 0) + +// LINUX: define i32 (%struct.S*, i32)* @_ZN1S3fooEi.resolver() comdat +// LINUX: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.arch_sandybridge +// LINUX: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.arch_ivybridge +// LINUX: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.sse4.2 +// LINUX: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi + +// WINDOWS: define dso_local i32 @"?foo@S@@QEAAHH@Z.resolver"(%struct.S*, i32) comdat +// WINDOWS: call i32 @"?foo@S@@QEAAHH@Z.arch_sandybridge"(%struct.S* %0, i32 %1) +// WINDOWS: call i32 @"?foo@S@@QEAAHH@Z.arch_ivybridge"(%struct.S* %0, i32 %1) +// WINDOWS: call i32 @"?foo@S@@QEAAHH@Z.sse4.2"(%struct.S* %0, i32 %1) +// WINDOWS: call i32 @"?foo@S@@QEAAHH@Z"(%struct.S* %0, i32 %1) + +// LINUX: declare i32 @_ZN1S3fooEi.arch_sandybridge(%struct.S*, i32) + +// WINDOWS: declare dso_local i32 @"?foo@S@@QEAAHH@Z.arch_sandybridge"(%struct.S*, i32) Index: test/CodeGenCXX/attr-target-mv-member-funcs.cpp =================================================================== --- test/CodeGenCXX/attr-target-mv-member-funcs.cpp +++ test/CodeGenCXX/attr-target-mv-member-funcs.cpp @@ -1,4 +1,5 @@ -// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=LINUX +// RUN: %clang_cc1 -std=c++11 -triple x86_64-windows-pc -emit-llvm %s -o - | FileCheck %s --check-prefix=WINDOWS struct S { int __attribute__((target("sse4.2"))) foo(int) { return 0; } @@ -64,82 +65,156 @@ return a.foo(1) + b.foo(2); } -// CHECK: @_ZN1SaSERKS_.ifunc = ifunc %struct.S* (%struct.S*, %struct.S*), %struct.S* (%struct.S*, %struct.S*)* ()* @_ZN1SaSERKS_.resolver -// CHECK: @_ZNK9ConvertTocv1SEv.ifunc = ifunc void (%struct.ConvertTo*), void (%struct.ConvertTo*)* ()* @_ZNK9ConvertTocv1SEv.resolver -// CHECK: @_ZN1S3fooEi.ifunc = ifunc i32 (%struct.S*, i32), i32 (%struct.S*, i32)* ()* @_ZN1S3fooEi.resolver -// CHECK: @_ZN2S23fooEi.ifunc = ifunc i32 (%struct.S2*, i32), i32 (%struct.S2*, i32)* ()* @_ZN2S23fooEi.resolver +// LINUX: @_ZN1SaSERKS_.ifunc = ifunc %struct.S* (%struct.S*, %struct.S*), %struct.S* (%struct.S*, %struct.S*)* ()* @_ZN1SaSERKS_.resolver +// LINUX: @_ZNK9ConvertTocv1SEv.ifunc = ifunc void (%struct.ConvertTo*), void (%struct.ConvertTo*)* ()* @_ZNK9ConvertTocv1SEv.resolver +// LINUX: @_ZN1S3fooEi.ifunc = ifunc i32 (%struct.S*, i32), i32 (%struct.S*, i32)* ()* @_ZN1S3fooEi.resolver +// LINUX: @_ZN2S23fooEi.ifunc = ifunc i32 (%struct.S2*, i32), i32 (%struct.S2*, i32)* ()* @_ZN2S23fooEi.resolver // Templates: -// CHECK: @_ZN5templIiE3fooEi.ifunc = ifunc i32 (%struct.templ*, i32), i32 (%struct.templ*, i32)* ()* @_ZN5templIiE3fooEi.resolver -// CHECK: @_ZN5templIdE3fooEi.ifunc = ifunc i32 (%struct.templ.0*, i32), i32 (%struct.templ.0*, i32)* ()* @_ZN5templIdE3fooEi.resolver - -// CHECK: define i32 @_Z3barv() -// CHECK: %s = alloca %struct.S, align 1 -// CHECK: %s2 = alloca %struct.S, align 1 -// CHECK: %C = alloca %struct.ConvertTo, align 1 -// CHECK: call dereferenceable(1) %struct.S* @_ZN1SaSERKS_.ifunc(%struct.S* %s2 -// CHECK: call void @_ZNK9ConvertTocv1SEv.ifunc(%struct.ConvertTo* %C) -// CHECK: call dereferenceable(1) %struct.S* @_ZN1SaSERKS_.ifunc(%struct.S* %s2 -// CHECK: call i32 @_ZN1S3fooEi.ifunc(%struct.S* %s, i32 0) - -// CHECK: define %struct.S* (%struct.S*, %struct.S*)* @_ZN1SaSERKS_.resolver() comdat -// CHECK: ret %struct.S* (%struct.S*, %struct.S*)* @_ZN1SaSERKS_.arch_ivybridge -// CHECK: ret %struct.S* (%struct.S*, %struct.S*)* @_ZN1SaSERKS_ - -// CHECK: define void (%struct.ConvertTo*)* @_ZNK9ConvertTocv1SEv.resolver() comdat -// CHECK: ret void (%struct.ConvertTo*)* @_ZNK9ConvertTocv1SEv.arch_ivybridge -// CHECK: ret void (%struct.ConvertTo*)* @_ZNK9ConvertTocv1SEv - -// CHECK: define i32 (%struct.S*, i32)* @_ZN1S3fooEi.resolver() comdat -// CHECK: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.arch_sandybridge -// CHECK: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.arch_ivybridge -// CHECK: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.sse4.2 -// CHECK: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi - -// CHECK: define i32 @_Z4bar2v() -// CHECK:call i32 @_ZN2S23fooEi.ifunc -// define i32 (%struct.S2*, i32)* @_ZN2S23fooEi.resolver() comdat -// CHECK: ret i32 (%struct.S2*, i32)* @_ZN2S23fooEi.arch_sandybridge -// CHECK: ret i32 (%struct.S2*, i32)* @_ZN2S23fooEi.arch_ivybridge -// CHECK: ret i32 (%struct.S2*, i32)* @_ZN2S23fooEi.sse4.2 -// CHECK: ret i32 (%struct.S2*, i32)* @_ZN2S23fooEi - -// CHECK: define i32 @_ZN2S23fooEi.sse4.2(%struct.S2* %this, i32) -// CHECK: define i32 @_ZN2S23fooEi.arch_ivybridge(%struct.S2* %this, i32) -// CHECK: define i32 @_ZN2S23fooEi(%struct.S2* %this, i32) - -// CHECK: define i32 @_Z9templ_usev() -// CHECK: call i32 @_ZN5templIiE3fooEi.ifunc -// CHECK: call i32 @_ZN5templIdE3fooEi.ifunc - -// CHECK: define i32 (%struct.templ*, i32)* @_ZN5templIiE3fooEi.resolver() comdat -// CHECK: ret i32 (%struct.templ*, i32)* @_ZN5templIiE3fooEi.arch_sandybridge -// CHECK: ret i32 (%struct.templ*, i32)* @_ZN5templIiE3fooEi.arch_ivybridge -// CHECK: ret i32 (%struct.templ*, i32)* @_ZN5templIiE3fooEi.sse4.2 -// CHECK: ret i32 (%struct.templ*, i32)* @_ZN5templIiE3fooEi - -// CHECK: define i32 (%struct.templ.0*, i32)* @_ZN5templIdE3fooEi.resolver() comdat -// CHECK: ret i32 (%struct.templ.0*, i32)* @_ZN5templIdE3fooEi.arch_sandybridge -// CHECK: ret i32 (%struct.templ.0*, i32)* @_ZN5templIdE3fooEi.arch_ivybridge -// CHECK: ret i32 (%struct.templ.0*, i32)* @_ZN5templIdE3fooEi.sse4.2 -// CHECK: ret i32 (%struct.templ.0*, i32)* @_ZN5templIdE3fooEi - -// CHECK: define linkonce_odr i32 @_ZN1S3fooEi.sse4.2(%struct.S* %this, i32) -// CHECK: ret i32 0 - -// CHECK: declare i32 @_ZN1S3fooEi.arch_sandybridge(%struct.S*, i32) - -// CHECK: define linkonce_odr i32 @_ZN1S3fooEi.arch_ivybridge(%struct.S* %this, i32) -// CHECK: ret i32 1 - -// CHECK: define linkonce_odr i32 @_ZN1S3fooEi(%struct.S* %this, i32) -// CHECK: ret i32 2 - -// CHECK: define linkonce_odr i32 @_ZN5templIiE3fooEi.sse4.2 -// CHECK: declare i32 @_ZN5templIiE3fooEi.arch_sandybridge -// CHECK: define linkonce_odr i32 @_ZN5templIiE3fooEi.arch_ivybridge -// CHECK: define linkonce_odr i32 @_ZN5templIiE3fooEi - -// CHECK: define linkonce_odr i32 @_ZN5templIdE3fooEi.sse4.2 -// CHECK: declare i32 @_ZN5templIdE3fooEi.arch_sandybridge -// CHECK: define linkonce_odr i32 @_ZN5templIdE3fooEi.arch_ivybridge -// CHECK: define linkonce_odr i32 @_ZN5templIdE3fooEi +// LINUX: @_ZN5templIiE3fooEi.ifunc = ifunc i32 (%struct.templ*, i32), i32 (%struct.templ*, i32)* ()* @_ZN5templIiE3fooEi.resolver +// LINUX: @_ZN5templIdE3fooEi.ifunc = ifunc i32 (%struct.templ.0*, i32), i32 (%struct.templ.0*, i32)* ()* @_ZN5templIdE3fooEi.resolver + +// LINUX: define i32 @_Z3barv() +// LINUX: %s = alloca %struct.S, align 1 +// LINUX: %s2 = alloca %struct.S, align 1 +// LINUX: %C = alloca %struct.ConvertTo, align 1 +// LINUX: call dereferenceable(1) %struct.S* @_ZN1SaSERKS_.ifunc(%struct.S* %s2 +// LINUX: call void @_ZNK9ConvertTocv1SEv.ifunc(%struct.ConvertTo* %C) +// LINUX: call dereferenceable(1) %struct.S* @_ZN1SaSERKS_.ifunc(%struct.S* %s2 +// LINUX: call i32 @_ZN1S3fooEi.ifunc(%struct.S* %s, i32 0) + +// WINDOWS: define dso_local i32 @"?bar@@YAHXZ"() +// WINDOWS: %s = alloca %struct.S, align 1 +// WINDOWS: %s2 = alloca %struct.S, align 1 +// WINDOWS: %C = alloca %struct.ConvertTo, align 1 +// WINDOWS: call dereferenceable(1) %struct.S* @"??4S@@QEAAAEAU0@AEBU0@@Z.resolver"(%struct.S* %s2 +// WINDOWS: call void @"??BConvertTo@@QEBA?AUS@@XZ.resolver"(%struct.ConvertTo* %C +// WINDOWS: call dereferenceable(1) %struct.S* @"??4S@@QEAAAEAU0@AEBU0@@Z.resolver"(%struct.S* %s2 +// WINDOWS: call i32 @"?foo@S@@QEAAHH@Z.resolver"(%struct.S* %s, i32 0) + +// LINUX: define %struct.S* (%struct.S*, %struct.S*)* @_ZN1SaSERKS_.resolver() comdat +// LINUX: ret %struct.S* (%struct.S*, %struct.S*)* @_ZN1SaSERKS_.arch_ivybridge +// LINUX: ret %struct.S* (%struct.S*, %struct.S*)* @_ZN1SaSERKS_ + +// WINDOWS: define dso_local %struct.S* @"??4S@@QEAAAEAU0@AEBU0@@Z.resolver"(%struct.S*, %struct.S*) +// WINDOWS: call %struct.S* @"??4S@@QEAAAEAU0@AEBU0@@Z.arch_ivybridge" +// WINDOWS: call %struct.S* @"??4S@@QEAAAEAU0@AEBU0@@Z" + +// LINUX: define void (%struct.ConvertTo*)* @_ZNK9ConvertTocv1SEv.resolver() comdat +// LINUX: ret void (%struct.ConvertTo*)* @_ZNK9ConvertTocv1SEv.arch_ivybridge +// LINUX: ret void (%struct.ConvertTo*)* @_ZNK9ConvertTocv1SEv + +// WINDOWS: define dso_local void @"??BConvertTo@@QEBA?AUS@@XZ.resolver"(%struct.ConvertTo*, %struct.S*) +// WINDOWS: call void @"??BConvertTo@@QEBA?AUS@@XZ.arch_ivybridge" +// WINDOWS: call void @"??BConvertTo@@QEBA?AUS@@XZ" + +// LINUX: define i32 (%struct.S*, i32)* @_ZN1S3fooEi.resolver() comdat +// LINUX: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.arch_sandybridge +// LINUX: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.arch_ivybridge +// LINUX: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.sse4.2 +// LINUX: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi + +// WINDOWS: define dso_local i32 @"?foo@S@@QEAAHH@Z.resolver"(%struct.S*, i32) +// WINDOWS: call i32 @"?foo@S@@QEAAHH@Z.arch_sandybridge" +// WINDOWS: call i32 @"?foo@S@@QEAAHH@Z.arch_ivybridge" +// WINDOWS: call i32 @"?foo@S@@QEAAHH@Z.sse4.2" +// WINDOWS: call i32 @"?foo@S@@QEAAHH@Z" + +// LINUX: define i32 @_Z4bar2v() +// LINUX: call i32 @_ZN2S23fooEi.ifunc + +// WINDOWS: define dso_local i32 @"?bar2@@YAHXZ"() +// WINDOWS: call i32 @"?foo@S2@@QEAAHH@Z.resolver" + +// LINUX: define i32 (%struct.S2*, i32)* @_ZN2S23fooEi.resolver() comdat +// LINUX: ret i32 (%struct.S2*, i32)* @_ZN2S23fooEi.arch_sandybridge +// LINUX: ret i32 (%struct.S2*, i32)* @_ZN2S23fooEi.arch_ivybridge +// LINUX: ret i32 (%struct.S2*, i32)* @_ZN2S23fooEi.sse4.2 +// LINUX: ret i32 (%struct.S2*, i32)* @_ZN2S23fooEi + +// WINDOWS: define dso_local i32 @"?foo@S2@@QEAAHH@Z.resolver"(%struct.S2*, i32) +// WINDOWS: call i32 @"?foo@S2@@QEAAHH@Z.arch_sandybridge" +// WINDOWS: call i32 @"?foo@S2@@QEAAHH@Z.arch_ivybridge" +// WINDOWS: call i32 @"?foo@S2@@QEAAHH@Z.sse4.2" +// WINDOWS: call i32 @"?foo@S2@@QEAAHH@Z" + +// LINUX: define i32 @_ZN2S23fooEi.sse4.2(%struct.S2* %this, i32) +// LINUX: define i32 @_ZN2S23fooEi.arch_ivybridge(%struct.S2* %this, i32) +// LINUX: define i32 @_ZN2S23fooEi(%struct.S2* %this, i32) + +// WINDOWS: define dso_local i32 @"?foo@S2@@QEAAHH@Z.sse4.2"(%struct.S2* %this, i32) +// WINDOWS: define dso_local i32 @"?foo@S2@@QEAAHH@Z.arch_ivybridge"(%struct.S2* %this, i32) +// WINDOWS: define dso_local i32 @"?foo@S2@@QEAAHH@Z"(%struct.S2* %this, i32) + +// LINUX: define i32 @_Z9templ_usev() +// LINUX: call i32 @_ZN5templIiE3fooEi.ifunc +// LINUX: call i32 @_ZN5templIdE3fooEi.ifunc + +// WINDOWS: define dso_local i32 @"?templ_use@@YAHXZ"() +// WINDOWS: call i32 @"?foo@?$templ@H@@QEAAHH@Z.resolver" +// WINDOWS: call i32 @"?foo@?$templ@N@@QEAAHH@Z.resolver" + +// LINUX: define i32 (%struct.templ*, i32)* @_ZN5templIiE3fooEi.resolver() comdat +// LINUX: ret i32 (%struct.templ*, i32)* @_ZN5templIiE3fooEi.arch_sandybridge +// LINUX: ret i32 (%struct.templ*, i32)* @_ZN5templIiE3fooEi.arch_ivybridge +// LINUX: ret i32 (%struct.templ*, i32)* @_ZN5templIiE3fooEi.sse4.2 +// LINUX: ret i32 (%struct.templ*, i32)* @_ZN5templIiE3fooEi + +// WINDOWS: define dso_local i32 @"?foo@?$templ@H@@QEAAHH@Z.resolver"(%struct.templ*, i32) +// WINDOWS: call i32 @"?foo@?$templ@H@@QEAAHH@Z.arch_sandybridge" +// WINDOWS: call i32 @"?foo@?$templ@H@@QEAAHH@Z.arch_ivybridge" +// WINDOWS: call i32 @"?foo@?$templ@H@@QEAAHH@Z.sse4.2" +// WINDOWS: call i32 @"?foo@?$templ@H@@QEAAHH@Z" + +// LINUX: define i32 (%struct.templ.0*, i32)* @_ZN5templIdE3fooEi.resolver() comdat +// LINUX: ret i32 (%struct.templ.0*, i32)* @_ZN5templIdE3fooEi.arch_sandybridge +// LINUX: ret i32 (%struct.templ.0*, i32)* @_ZN5templIdE3fooEi.arch_ivybridge +// LINUX: ret i32 (%struct.templ.0*, i32)* @_ZN5templIdE3fooEi.sse4.2 +// LINUX: ret i32 (%struct.templ.0*, i32)* @_ZN5templIdE3fooEi + +// WINDOWS: define dso_local i32 @"?foo@?$templ@N@@QEAAHH@Z.resolver"(%struct.templ.0*, i32) comdat +// WINDOWS: call i32 @"?foo@?$templ@N@@QEAAHH@Z.arch_sandybridge" +// WINDOWS: call i32 @"?foo@?$templ@N@@QEAAHH@Z.arch_ivybridge" +// WINDOWS: call i32 @"?foo@?$templ@N@@QEAAHH@Z.sse4.2" +// WINDOWS: call i32 @"?foo@?$templ@N@@QEAAHH@Z" + +// LINUX: define linkonce_odr i32 @_ZN1S3fooEi.sse4.2(%struct.S* %this, i32) +// LINUX: ret i32 0 + +// WINDOWS: define linkonce_odr dso_local i32 @"?foo@S@@QEAAHH@Z.sse4.2"(%struct.S* %this, i32) +// WINDOWS: ret i32 0 + +// LINUX: declare i32 @_ZN1S3fooEi.arch_sandybridge(%struct.S*, i32) + +// WINDOWS: declare dso_local i32 @"?foo@S@@QEAAHH@Z.arch_sandybridge"(%struct.S*, i32) + +// LINUX: define linkonce_odr i32 @_ZN1S3fooEi.arch_ivybridge(%struct.S* %this, i32) +// LINUX: ret i32 1 + +// WINDOWS: define linkonce_odr dso_local i32 @"?foo@S@@QEAAHH@Z.arch_ivybridge"(%struct.S* %this, i32) +// WINDOWS: ret i32 1 + +// LINUX: define linkonce_odr i32 @_ZN1S3fooEi(%struct.S* %this, i32) +// LINUX: ret i32 2 + +// WINDOWS: define linkonce_odr dso_local i32 @"?foo@S@@QEAAHH@Z"(%struct.S* %this, i32) +// WINDOWS: ret i32 2 + +// LINUX: define linkonce_odr i32 @_ZN5templIiE3fooEi.sse4.2 +// LINUX: declare i32 @_ZN5templIiE3fooEi.arch_sandybridge +// LINUX: define linkonce_odr i32 @_ZN5templIiE3fooEi.arch_ivybridge +// LINUX: define linkonce_odr i32 @_ZN5templIiE3fooEi + +// WINDOWS: define linkonce_odr dso_local i32 @"?foo@?$templ@H@@QEAAHH@Z.sse4.2" +// WINDOWS: declare dso_local i32 @"?foo@?$templ@H@@QEAAHH@Z.arch_sandybridge" +// WINDOWS: define linkonce_odr dso_local i32 @"?foo@?$templ@H@@QEAAHH@Z.arch_ivybridge" +// WINDOWS: define linkonce_odr dso_local i32 @"?foo@?$templ@H@@QEAAHH@Z" + +// LINUX: define linkonce_odr i32 @_ZN5templIdE3fooEi.sse4.2 +// LINUX: declare i32 @_ZN5templIdE3fooEi.arch_sandybridge +// LINUX: define linkonce_odr i32 @_ZN5templIdE3fooEi.arch_ivybridge +// LINUX: define linkonce_odr i32 @_ZN5templIdE3fooEi + +// WINDOWS: define linkonce_odr dso_local i32 @"?foo@?$templ@N@@QEAAHH@Z.sse4.2" +// WINDOWS: declare dso_local i32 @"?foo@?$templ@N@@QEAAHH@Z.arch_sandybridge" +// WINDOWS: define linkonce_odr dso_local i32 @"?foo@?$templ@N@@QEAAHH@Z.arch_ivybridge" +// WINDOWS: define linkonce_odr dso_local i32 @"?foo@?$templ@N@@QEAAHH@Z" Index: test/CodeGenCXX/attr-target-mv-func-ptrs.cpp =================================================================== --- test/CodeGenCXX/attr-target-mv-func-ptrs.cpp +++ test/CodeGenCXX/attr-target-mv-func-ptrs.cpp @@ -1,4 +1,5 @@ -// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=LINUX +// RUN: %clang_cc1 -std=c++11 -triple x86_64-windows-pc -emit-llvm %s -o - | FileCheck %s --check-prefix=WINDOWS void temp(); void temp(int); using FP = void(*)(int); @@ -31,15 +32,23 @@ return Free(1) + (s.*Member)(2); } +// LINUX: @_Z3fooi.ifunc +// LINUX: @_ZN1S3fooEi.ifunc -// CHECK: @_Z3fooi.ifunc -// CHECK: @_ZN1S3fooEi.ifunc - -// CHECK: define i32 @_Z3barv() +// LINUX: define i32 @_Z3barv() // Store to Free of ifunc -// CHECK: store i32 (i32)* @_Z3fooi.ifunc +// LINUX: store i32 (i32)* @_Z3fooi.ifunc // Store to Member of ifunc -// CHECK: store { i64, i64 } { i64 ptrtoint (i32 (%struct.S*, i32)* @_ZN1S3fooEi.ifunc to i64), i64 0 }, { i64, i64 }* [[MEMBER:%[a-z]+]] +// LINUX: store { i64, i64 } { i64 ptrtoint (i32 (%struct.S*, i32)* @_ZN1S3fooEi.ifunc to i64), i64 0 }, { i64, i64 }* [[MEMBER:%[a-z]+]] // Call to 'f' with the ifunc -// CHECK: call void @_Z1fPFiiEM1SFiiE(i32 (i32)* @_Z3fooi.ifunc +// LINUX: call void @_Z1fPFiiEM1SFiiE(i32 (i32)* @_Z3fooi.ifunc + +// WINDOWS: define dso_local i32 @"?bar@@YAHXZ"() +// Store to Free +// WINDOWS: store i32 (i32)* @"?foo@@YAHH@Z.resolver", i32 (i32)** +// Store to Member +// WINDOWS: store i8* bitcast (i32 (%struct.S*, i32)* @"?foo@S@@QEAAHH@Z.resolver" to i8*), i8** + +// Call to 'f' +// WINDOWS: call void @"?f@@YAXP6AHH@ZP8S@@EAAHH@Z@Z"(i32 (i32)* @"?foo@@YAHH@Z.resolver", i8* bitcast (i32 (%struct.S*, i32)* @"?foo@S@@QEAAHH@Z.resolver" to i8*)) Index: test/CodeGenCXX/attr-target-mv-diff-ns.cpp =================================================================== --- test/CodeGenCXX/attr-target-mv-diff-ns.cpp +++ test/CodeGenCXX/attr-target-mv-diff-ns.cpp @@ -1,5 +1,6 @@ -// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s -// Test ensures that this properly differentiates between types in different +// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=LINUX +// RUN: %clang_cc1 -std=c++11 -triple x86_64-windows-pc -emit-llvm %s -o - | FileCheck %s --check-prefix=WINDOWS +// Test ensures that this properly differentiates between types in different // namespaces. int __attribute__((target("sse4.2"))) foo(int) { return 0; } int __attribute__((target("arch=sandybridge"))) foo(int); @@ -17,38 +18,71 @@ return foo(1) + ns::foo(2); } -// CHECK: @_Z3fooi.ifunc = ifunc i32 (i32), i32 (i32)* ()* @_Z3fooi.resolver -// CHECK: @_ZN2ns3fooEi.ifunc = ifunc i32 (i32), i32 (i32)* ()* @_ZN2ns3fooEi.resolver - -// CHECK: define i32 @_Z3fooi.sse4.2(i32) -// CHECK: ret i32 0 -// CHECK: define i32 @_Z3fooi.arch_ivybridge(i32) -// CHECK: ret i32 1 -// CHECK: define i32 @_Z3fooi(i32) -// CHECK: ret i32 2 - -// CHECK: define i32 @_ZN2ns3fooEi.sse4.2(i32) -// CHECK: ret i32 0 -// CHECK: define i32 @_ZN2ns3fooEi.arch_ivybridge(i32) -// CHECK: ret i32 1 -// CHECK: define i32 @_ZN2ns3fooEi(i32) -// CHECK: ret i32 2 - -// CHECK: define i32 @_Z3barv() -// CHECK: call i32 @_Z3fooi.ifunc(i32 1) -// CHECK: call i32 @_ZN2ns3fooEi.ifunc(i32 2) - -// CHECK: define i32 (i32)* @_Z3fooi.resolver() comdat -// CHECK: ret i32 (i32)* @_Z3fooi.arch_sandybridge -// CHECK: ret i32 (i32)* @_Z3fooi.arch_ivybridge -// CHECK: ret i32 (i32)* @_Z3fooi.sse4.2 -// CHECK: ret i32 (i32)* @_Z3fooi -// -// CHECK: define i32 (i32)* @_ZN2ns3fooEi.resolver() comdat -// CHECK: ret i32 (i32)* @_ZN2ns3fooEi.arch_sandybridge -// CHECK: ret i32 (i32)* @_ZN2ns3fooEi.arch_ivybridge -// CHECK: ret i32 (i32)* @_ZN2ns3fooEi.sse4.2 -// CHECK: ret i32 (i32)* @_ZN2ns3fooEi - -// CHECK: declare i32 @_Z3fooi.arch_sandybridge(i32) -// CHECK: declare i32 @_ZN2ns3fooEi.arch_sandybridge(i32) +// LINUX: @_Z3fooi.ifunc = ifunc i32 (i32), i32 (i32)* ()* @_Z3fooi.resolver +// LINUX: @_ZN2ns3fooEi.ifunc = ifunc i32 (i32), i32 (i32)* ()* @_ZN2ns3fooEi.resolver + +// LINUX: define i32 @_Z3fooi.sse4.2(i32) +// LINUX: ret i32 0 +// LINUX: define i32 @_Z3fooi.arch_ivybridge(i32) +// LINUX: ret i32 1 +// LINUX: define i32 @_Z3fooi(i32) +// LINUX: ret i32 2 + +// WINDOWS: define dso_local i32 @"?foo@@YAHH@Z.sse4.2"(i32) +// WINDOWS: ret i32 0 +// WINDOWS: define dso_local i32 @"?foo@@YAHH@Z.arch_ivybridge"(i32) +// WINDOWS: ret i32 1 +// WINDOWS: define dso_local i32 @"?foo@@YAHH@Z"(i32) +// WINDOWS: ret i32 2 + +// LINUX: define i32 @_ZN2ns3fooEi.sse4.2(i32) +// LINUX: ret i32 0 +// LINUX: define i32 @_ZN2ns3fooEi.arch_ivybridge(i32) +// LINUX: ret i32 1 +// LINUX: define i32 @_ZN2ns3fooEi(i32) +// LINUX: ret i32 2 + +// WINDOWS: define dso_local i32 @"?foo@ns@@YAHH@Z.sse4.2"(i32) +// WINDOWS: ret i32 0 +// WINDOWS: define dso_local i32 @"?foo@ns@@YAHH@Z.arch_ivybridge"(i32) +// WINDOWS: ret i32 1 +// WINDOWS: define dso_local i32 @"?foo@ns@@YAHH@Z"(i32) +// WINDOWS: ret i32 2 + +// LINUX: define i32 @_Z3barv() +// LINUX: call i32 @_Z3fooi.ifunc(i32 1) +// LINUX: call i32 @_ZN2ns3fooEi.ifunc(i32 2) + +// WINDOWS: define dso_local i32 @"?bar@@YAHXZ"() +// WINDOWS: call i32 @"?foo@@YAHH@Z.resolver"(i32 1) +// WINDOWS: call i32 @"?foo@ns@@YAHH@Z.resolver"(i32 2) + +// LINUX: define i32 (i32)* @_Z3fooi.resolver() comdat +// LINUX: ret i32 (i32)* @_Z3fooi.arch_sandybridge +// LINUX: ret i32 (i32)* @_Z3fooi.arch_ivybridge +// LINUX: ret i32 (i32)* @_Z3fooi.sse4.2 +// LINUX: ret i32 (i32)* @_Z3fooi + +// WINDOWS: define dso_local i32 @"?foo@@YAHH@Z.resolver"(i32) comdat +// WINDOWS: call i32 @"?foo@@YAHH@Z.arch_sandybridge"(i32 %0) +// WINDOWS: call i32 @"?foo@@YAHH@Z.arch_ivybridge"(i32 %0) +// WINDOWS: call i32 @"?foo@@YAHH@Z.sse4.2"(i32 %0) +// WINDOWS: call i32 @"?foo@@YAHH@Z"(i32 %0) + +// LINUX: define i32 (i32)* @_ZN2ns3fooEi.resolver() comdat +// LINUX: ret i32 (i32)* @_ZN2ns3fooEi.arch_sandybridge +// LINUX: ret i32 (i32)* @_ZN2ns3fooEi.arch_ivybridge +// LINUX: ret i32 (i32)* @_ZN2ns3fooEi.sse4.2 +// LINUX: ret i32 (i32)* @_ZN2ns3fooEi + +// WINDOWS: define dso_local i32 @"?foo@ns@@YAHH@Z.resolver"(i32) comdat +// WINDOWS: call i32 @"?foo@ns@@YAHH@Z.arch_sandybridge"(i32 %0) +// WINDOWS: call i32 @"?foo@ns@@YAHH@Z.arch_ivybridge"(i32 %0) +// WINDOWS: call i32 @"?foo@ns@@YAHH@Z.sse4.2"(i32 %0) +// WINDOWS: call i32 @"?foo@ns@@YAHH@Z"(i32 %0) + +// LINUX: declare i32 @_Z3fooi.arch_sandybridge(i32) +// LINUX: declare i32 @_ZN2ns3fooEi.arch_sandybridge(i32) + +// WINDOWS: declare dso_local i32 @"?foo@@YAHH@Z.arch_sandybridge"(i32) +// WINDOWS: declare dso_local i32 @"?foo@ns@@YAHH@Z.arch_sandybridge"(i32) Index: test/CodeGen/attr-target-mv.c =================================================================== --- test/CodeGen/attr-target-mv.c +++ test/CodeGen/attr-target-mv.c @@ -1,4 +1,6 @@ -// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=LINUX +// RUN: %clang_cc1 -triple x86_64-windows-pc -emit-llvm %s -o - | FileCheck %s --check-prefix=WINDOWS + int __attribute__((target("sse4.2"))) foo(void) { return 0; } int __attribute__((target("arch=sandybridge"))) foo(void); int __attribute__((target("arch=ivybridge"))) foo(void) {return 1;} @@ -25,82 +27,155 @@ inline __attribute__((target("default"))) void foo_decls(void) {} inline __attribute__((target("sse4.2"))) void foo_decls(void) {} -inline __attribute__((target("default"))) void foo_multi(void) {} -inline __attribute__((target("avx,sse4.2"))) void foo_multi(void) {} -inline __attribute__((target("sse4.2,fma4"))) void foo_multi(void) {} -inline __attribute__((target("arch=ivybridge,fma4,sse4.2"))) void foo_multi(void) {} +inline __attribute__((target("default"))) void foo_multi(int i, double d) {} +inline __attribute__((target("avx,sse4.2"))) void foo_multi(int i, double d) {} +inline __attribute__((target("sse4.2,fma4"))) void foo_multi(int i, double d) {} +inline __attribute__((target("arch=ivybridge,fma4,sse4.2"))) void foo_multi(int i, double d) {} void bar4() { - foo_multi(); + foo_multi(1, 5.0); } -// CHECK: @foo.ifunc = ifunc i32 (), i32 ()* ()* @foo.resolver -// CHECK: @foo_inline.ifunc = ifunc i32 (), i32 ()* ()* @foo_inline.resolver -// CHECK: @foo_decls.ifunc = ifunc void (), void ()* ()* @foo_decls.resolver - -// CHECK: define i32 @foo.sse4.2() -// CHECK: ret i32 0 -// CHECK: define i32 @foo.arch_ivybridge() -// CHECK: ret i32 1 -// CHECK: define i32 @foo() -// CHECK: ret i32 2 -// CHECK: define i32 @bar() -// CHECK: call i32 @foo.ifunc() - -// CHECK: define i32 ()* @foo.resolver() comdat -// CHECK: call void @__cpu_indicator_init() -// CHECK: ret i32 ()* @foo.arch_sandybridge -// CHECK: ret i32 ()* @foo.arch_ivybridge -// CHECK: ret i32 ()* @foo.sse4.2 -// CHECK: ret i32 ()* @foo - -// CHECK: define i32 @bar2() -// CHECK: call i32 @foo_inline.ifunc() - -// CHECK: define i32 ()* @foo_inline.resolver() comdat -// CHECK: call void @__cpu_indicator_init() -// CHECK: ret i32 ()* @foo_inline.arch_sandybridge -// CHECK: ret i32 ()* @foo_inline.arch_ivybridge -// CHECK: ret i32 ()* @foo_inline.sse4.2 -// CHECK: ret i32 ()* @foo_inline - -// CHECK: define void @bar3() -// CHECK: call void @foo_decls.ifunc() - -// CHECK: define void ()* @foo_decls.resolver() comdat -// CHECK: ret void ()* @foo_decls.sse4.2 -// CHECK: ret void ()* @foo_decls - -// CHECK: define void @bar4() -// CHECK: call void @foo_multi.ifunc() - -// CHECK: define void ()* @foo_multi.resolver() comdat -// CHECK: and i32 %{{.*}}, 4352 -// CHECK: icmp eq i32 %{{.*}}, 4352 -// CHECK: ret void ()* @foo_multi.fma4_sse4.2 -// CHECK: icmp eq i32 %{{.*}}, 12 -// CHECK: and i32 %{{.*}}, 4352 -// CHECK: icmp eq i32 %{{.*}}, 4352 -// CHECK: ret void ()* @foo_multi.arch_ivybridge_fma4_sse4.2 -// CHECK: and i32 %{{.*}}, 768 -// CHECK: icmp eq i32 %{{.*}}, 768 -// CHECK: ret void ()* @foo_multi.avx_sse4.2 -// CHECK: ret void ()* @foo_multi - -// CHECK: declare i32 @foo.arch_sandybridge() - -// CHECK: define linkonce i32 @foo_inline.sse4.2() -// CHECK: ret i32 0 - -// CHECK: declare i32 @foo_inline.arch_sandybridge() -// -// CHECK: define linkonce i32 @foo_inline.arch_ivybridge() -// CHECK: ret i32 1 -// CHECK: define linkonce i32 @foo_inline() -// CHECK: ret i32 2 - -// CHECK: define linkonce void @foo_decls() -// CHECK: define linkonce void @foo_decls.sse4.2() - -// CHECK: define linkonce void @foo_multi.avx_sse4.2() -// CHECK: define linkonce void @foo_multi.fma4_sse4.2() -// CHECK: define linkonce void @foo_multi.arch_ivybridge_fma4_sse4.2() +// LINUX: @foo.ifunc = ifunc i32 (), i32 ()* ()* @foo.resolver +// LINUX: @foo_inline.ifunc = ifunc i32 (), i32 ()* ()* @foo_inline.resolver +// LINUX: @foo_decls.ifunc = ifunc void (), void ()* ()* @foo_decls.resolver +// LINUX: @foo_multi.ifunc = ifunc void (i32, double), void (i32, double)* ()* @foo_multi.resolver + +// LINUX: define i32 @foo.sse4.2() +// LINUX: ret i32 0 +// LINUX: define i32 @foo.arch_ivybridge() +// LINUX: ret i32 1 +// LINUX: define i32 @foo() +// LINUX: ret i32 2 +// LINUX: define i32 @bar() +// LINUX: call i32 @foo.ifunc() + +// WINDOWS: define dso_local i32 @foo.sse4.2() +// WINDOWS: ret i32 0 +// WINDOWS: define dso_local i32 @foo.arch_ivybridge() +// WINDOWS: ret i32 1 +// WINDOWS: define dso_local i32 @foo() +// WINDOWS: ret i32 2 +// WINDOWS: define dso_local i32 @bar() +// WINDOWS: call i32 @foo.resolver() + +// LINUX: define i32 ()* @foo.resolver() comdat +// LINUX: call void @__cpu_indicator_init() +// LINUX: ret i32 ()* @foo.arch_sandybridge +// LINUX: ret i32 ()* @foo.arch_ivybridge +// LINUX: ret i32 ()* @foo.sse4.2 +// LINUX: ret i32 ()* @foo + +// WINDOWS: define dso_local i32 @foo.resolver() comdat +// WINDOWS: call void @__cpu_indicator_init() +// WINDOWS: call i32 @foo.arch_sandybridge +// WINDOWS: call i32 @foo.arch_ivybridge +// WINDOWS: call i32 @foo.sse4.2 +// WINDOWS: call i32 @foo + +// LINUX: define i32 @bar2() +// LINUX: call i32 @foo_inline.ifunc() + +// WINDOWS: define dso_local i32 @bar2() +// WINDOWS: call i32 @foo_inline.resolver() + +// LINUX: define i32 ()* @foo_inline.resolver() comdat +// LINUX: call void @__cpu_indicator_init() +// LINUX: ret i32 ()* @foo_inline.arch_sandybridge +// LINUX: ret i32 ()* @foo_inline.arch_ivybridge +// LINUX: ret i32 ()* @foo_inline.sse4.2 +// LINUX: ret i32 ()* @foo_inline + +// WINDOWS: define dso_local i32 @foo_inline.resolver() comdat +// WINDOWS: call void @__cpu_indicator_init() +// WINDOWS: call i32 @foo_inline.arch_sandybridge +// WINDOWS: call i32 @foo_inline.arch_ivybridge +// WINDOWS: call i32 @foo_inline.sse4.2 +// WINDOWS: call i32 @foo_inline + +// LINUX: define void @bar3() +// LINUX: call void @foo_decls.ifunc() + +// WINDOWS: define dso_local void @bar3() +// WINDOWS: call void @foo_decls.resolver() + +// LINUX: define void ()* @foo_decls.resolver() comdat +// LINUX: ret void ()* @foo_decls.sse4.2 +// LINUX: ret void ()* @foo_decls + +// WINDOWS: define dso_local void @foo_decls.resolver() comdat +// WINDOWS: call void @foo_decls.sse4.2 +// Windows: call void @foo_decls + +// LINUX: define void @bar4() +// LINUX: call void @foo_multi.ifunc(i32 1, double 5.{{[0+e]*}}) + +// WINDOWS: define dso_local void @bar4() +// WINDOWS: call void @foo_multi.resolver(i32 1, double 5.{{[0+e]*}}) + +// LINUX: define void (i32, double)* @foo_multi.resolver() comdat +// LINUX: and i32 %{{.*}}, 4352 +// LINUX: icmp eq i32 %{{.*}}, 4352 +// LINUX: ret void (i32, double)* @foo_multi.fma4_sse4.2 +// LINUX: icmp eq i32 %{{.*}}, 12 +// LINUX: and i32 %{{.*}}, 4352 +// LINUX: icmp eq i32 %{{.*}}, 4352 +// LINUX: ret void (i32, double)* @foo_multi.arch_ivybridge_fma4_sse4.2 +// LINUX: and i32 %{{.*}}, 768 +// LINUX: icmp eq i32 %{{.*}}, 768 +// LINUX: ret void (i32, double)* @foo_multi.avx_sse4.2 +// LINUX: ret void (i32, double)* @foo_multi + +// WINDOWS: define dso_local void @foo_multi.resolver(i32, double) comdat +// WINDOWS: and i32 %{{.*}}, 4352 +// WINDOWS: icmp eq i32 %{{.*}}, 4352 +// WINDOWS: call void @foo_multi.fma4_sse4.2(i32 %0, double %1) +// WINDOWS-NEXT: ret void +// WINDOWS: icmp eq i32 %{{.*}}, 12 +// WINDOWS: and i32 %{{.*}}, 4352 +// WINDOWS: icmp eq i32 %{{.*}}, 4352 +// WINDOWS: call void @foo_multi.arch_ivybridge_fma4_sse4.2(i32 %0, double %1) +// WINDOWS-NEXT: ret void +// WINDOWS: and i32 %{{.*}}, 768 +// WINDOWS: icmp eq i32 %{{.*}}, 768 +// WINDOWS: call void @foo_multi.avx_sse4.2(i32 %0, double %1) +// WINDOWS-NEXT: ret void +// WINDOWS: call void @foo_multi(i32 %0, double %1) +// WINDOWS-NEXT: ret void + +// LINUX: declare i32 @foo.arch_sandybridge() + +// WINDOWS: declare dso_local i32 @foo.arch_sandybridge() + +// LINUX: define linkonce i32 @foo_inline.sse4.2() +// LINUX: ret i32 0 + +// WINDOWS: define linkonce_odr dso_local i32 @foo_inline.sse4.2() +// WINDOWS: ret i32 0 + +// LINUX: declare i32 @foo_inline.arch_sandybridge() + +// WINDOWS: declare dso_local i32 @foo_inline.arch_sandybridge() + +// LINUX: define linkonce i32 @foo_inline.arch_ivybridge() +// LINUX: ret i32 1 +// LINUX: define linkonce i32 @foo_inline() +// LINUX: ret i32 2 + +// WINDOWS: define linkonce_odr dso_local i32 @foo_inline.arch_ivybridge() +// WINDOWS: ret i32 1 +// WINDOWS: define linkonce_odr dso_local i32 @foo_inline() +// WINDOWS: ret i32 2 + +// LINUX: define linkonce void @foo_decls() +// LINUX: define linkonce void @foo_decls.sse4.2() + +// WINDOWS: define linkonce_odr dso_local void @foo_decls() +// WINDOWS: define linkonce_odr dso_local void @foo_decls.sse4.2() + +// LINUX: define linkonce void @foo_multi.avx_sse4.2(i32 %{{[^,]+}}, double %{{[^\)]+}}) +// LINUX: define linkonce void @foo_multi.fma4_sse4.2(i32 %{{[^,]+}}, double %{{[^\)]+}}) +// LINUX: define linkonce void @foo_multi.arch_ivybridge_fma4_sse4.2(i32 %{{[^,]+}}, double %{{[^\)]+}}) + +// WINDOWS: define linkonce_odr dso_local void @foo_multi.avx_sse4.2(i32 %{{[^,]+}}, double %{{[^\)]+}}) +// WINDOWS: define linkonce_odr dso_local void @foo_multi.fma4_sse4.2(i32 %{{[^,]+}}, double %{{[^\)]+}}) +// WINDOWS: define linkonce_odr dso_local void @foo_multi.arch_ivybridge_fma4_sse4.2(i32 %{{[^,]+}}, double %{{[^\)]+}}) Index: test/CodeGen/attr-target-mv-va-args.c =================================================================== --- test/CodeGen/attr-target-mv-va-args.c +++ test/CodeGen/attr-target-mv-va-args.c @@ -1,4 +1,5 @@ -// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=LINUX +// RUN: %clang_cc1 -triple x86_64-windows-pc -emit-llvm %s -o - | FileCheck %s --check-prefix=WINDOWS int __attribute__((target("sse4.2"))) foo(int i, ...) { return 0; } int __attribute__((target("arch=sandybridge"))) foo(int i, ...); int __attribute__((target("arch=ivybridge"))) foo(int i, ...) {return 1;} @@ -8,19 +9,37 @@ return foo(1, 'a', 1.1) + foo(2, 2.2, "asdf"); } -// CHECK: @foo.ifunc = ifunc i32 (i32, ...), i32 (i32, ...)* ()* @foo.resolver -// CHECK: define i32 @foo.sse4.2(i32 %i, ...) -// CHECK: ret i32 0 -// CHECK: define i32 @foo.arch_ivybridge(i32 %i, ...) -// CHECK: ret i32 1 -// CHECK: define i32 @foo(i32 %i, ...) -// CHECK: ret i32 2 -// CHECK: define i32 @bar() -// CHECK: call i32 (i32, ...) @foo.ifunc(i32 1, i32 97, double -// CHECK: call i32 (i32, ...) @foo.ifunc(i32 2, double 2.2{{[0-9Ee+]+}}, i8* getelementptr inbounds -// CHECK: define i32 (i32, ...)* @foo.resolver() comdat -// CHECK: ret i32 (i32, ...)* @foo.arch_sandybridge -// CHECK: ret i32 (i32, ...)* @foo.arch_ivybridge -// CHECK: ret i32 (i32, ...)* @foo.sse4.2 -// CHECK: ret i32 (i32, ...)* @foo -// CHECK: declare i32 @foo.arch_sandybridge(i32, ...) +// LINUX: @foo.ifunc = ifunc i32 (i32, ...), i32 (i32, ...)* ()* @foo.resolver +// LINUX: define i32 @foo.sse4.2(i32 %i, ...) +// LINUX: ret i32 0 +// LINUX: define i32 @foo.arch_ivybridge(i32 %i, ...) +// LINUX: ret i32 1 +// LINUX: define i32 @foo(i32 %i, ...) +// LINUX: ret i32 2 +// LINUX: define i32 @bar() +// LINUX: call i32 (i32, ...) @foo.ifunc(i32 1, i32 97, double +// LINUX: call i32 (i32, ...) @foo.ifunc(i32 2, double 2.2{{[0-9Ee+]+}}, i8* getelementptr inbounds + +// LINUX: define i32 (i32, ...)* @foo.resolver() comdat +// LINUX: ret i32 (i32, ...)* @foo.arch_sandybridge +// LINUX: ret i32 (i32, ...)* @foo.arch_ivybridge +// LINUX: ret i32 (i32, ...)* @foo.sse4.2 +// LINUX: ret i32 (i32, ...)* @foo +// LINUX: declare i32 @foo.arch_sandybridge(i32, ...) + +// WINDOWS: define dso_local i32 @foo.sse4.2(i32 %i, ...) +// WINDOWS: ret i32 0 +// WINDOWS: define dso_local i32 @foo.arch_ivybridge(i32 %i, ...) +// WINDOWS: ret i32 1 +// WINDOWS: define dso_local i32 @foo(i32 %i, ...) +// WINDOWS: ret i32 2 +// WINDOWS: define dso_local i32 @bar() +// WINDOWS: call i32 (i32, ...) @foo.resolver(i32 1, i32 97, double +// WINDOWS: call i32 (i32, ...) @foo.resolver(i32 2, double 2.2{{[0-9Ee+]+}}, i8* getelementptr inbounds + +// WINDOWS: define dso_local i32 @foo.resolver(i32, ...) comdat +// WINDOWS: call i32 (i32, ...) @foo.arch_sandybridge +// WINDOWS: call i32 (i32, ...) @foo.arch_ivybridge +// WINDOWS: call i32 (i32, ...) @foo.sse4.2 +// WINDOWS: call i32 (i32, ...) @foo +// WINDOWS: declare dso_local i32 @foo.arch_sandybridge(i32, ...) Index: test/CodeGen/attr-target-mv-func-ptrs.c =================================================================== --- test/CodeGen/attr-target-mv-func-ptrs.c +++ test/CodeGen/attr-target-mv-func-ptrs.c @@ -1,4 +1,5 @@ -// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=LINUX +// RUN: %clang_cc1 -triple x86_64-windows-pc -emit-llvm %s -o - | FileCheck %s --check-prefix=WINDOWS int __attribute__((target("sse4.2"))) foo(int i) { return 0; } int __attribute__((target("arch=sandybridge"))) foo(int); int __attribute__((target("arch=ivybridge"))) foo(int i) {return 1;} @@ -16,17 +17,31 @@ return Free(1) + Free(2); } -// CHECK: @foo.ifunc = ifunc i32 (i32), i32 (i32)* ()* @foo.resolver -// CHECK: define i32 @foo.sse4.2( -// CHECK: ret i32 0 -// CHECK: define i32 @foo.arch_ivybridge( -// CHECK: ret i32 1 -// CHECK: define i32 @foo( -// CHECK: ret i32 2 +// LINUX: @foo.ifunc = ifunc i32 (i32), i32 (i32)* ()* @foo.resolver +// LINUX: define i32 @foo.sse4.2( +// LINUX: ret i32 0 +// LINUX: define i32 @foo.arch_ivybridge( +// LINUX: ret i32 1 +// LINUX: define i32 @foo( +// LINUX: ret i32 2 -// CHECK: define i32 @bar() -// CHECK: call void @func(i32 (i32)* @foo.ifunc) -// CHECK: store i32 (i32)* @foo.ifunc -// CHECK: store i32 (i32)* @foo.ifunc +// WINDOWS: define dso_local i32 @foo.sse4.2( +// WINDOWS: ret i32 0 +// WINDOWS: define dso_local i32 @foo.arch_ivybridge( +// WINDOWS: ret i32 1 +// WINDOWS: define dso_local i32 @foo( +// WINDOWS: ret i32 2 -// CHECK: declare i32 @foo.arch_sandybridge( +// LINUX: define i32 @bar() +// LINUX: call void @func(i32 (i32)* @foo.ifunc) +// LINUX: store i32 (i32)* @foo.ifunc +// LINUX: store i32 (i32)* @foo.ifunc + +// WINDOWS: define dso_local i32 @bar() +// WINDOWS: call void @func(i32 (i32)* @foo.resolver) +// WINDOWS: store i32 (i32)* @foo.resolver +// WINDOWS: store i32 (i32)* @foo.resolver + +// LINUX: declare i32 @foo.arch_sandybridge( + +// WINDOWS: declare dso_local i32 @foo.arch_sandybridge( Index: test/CodeGen/attr-cpuspecific.c =================================================================== --- test/CodeGen/attr-cpuspecific.c +++ test/CodeGen/attr-cpuspecific.c @@ -1,100 +1,209 @@ -// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,LINUX +// RUN: %clang_cc1 -triple x86_64-windows-pc -fms-compatibility -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,WINDOWS +#ifdef _WIN64 +#define ATTR(X) __declspec(X) +#else +#define ATTR(X) __attribute__((X)) +#endif // _MSC_VER // Each called version should have an IFunc. -// CHECK: @SingleVersion.ifunc = ifunc void (), void ()* ()* @SingleVersion.resolver -// CHECK: @TwoVersions.ifunc = ifunc void (), void ()* ()* @TwoVersions.resolver -// CHECK: @TwoVersionsSameAttr.ifunc = ifunc void (), void ()* ()* @TwoVersionsSameAttr.resolver -// CHECK: @ThreeVersionsSameAttr.ifunc = ifunc void (), void ()* ()* @ThreeVersionsSameAttr.resolver +// LINUX: @SingleVersion.ifunc = ifunc void (), void ()* ()* @SingleVersion.resolver +// LINUX: @TwoVersions.ifunc = ifunc void (), void ()* ()* @TwoVersions.resolver +// LINUX: @TwoVersionsSameAttr.ifunc = ifunc void (), void ()* ()* @TwoVersionsSameAttr.resolver +// LINUX: @ThreeVersionsSameAttr.ifunc = ifunc void (), void ()* ()* @ThreeVersionsSameAttr.resolver -__attribute__((cpu_specific(ivybridge))) +ATTR(cpu_specific(ivybridge)) void SingleVersion(void){} -// CHECK: define void @SingleVersion.S() #[[S:[0-9]+]] +// LINUX: define void @SingleVersion.S() #[[S:[0-9]+]] +// WINDOWS: define dso_local void @SingleVersion.S() #[[S:[0-9]+]] -__attribute__((cpu_specific(ivybridge))) +ATTR(cpu_specific(ivybridge)) void NotCalled(void){} -// CHECK: define void @NotCalled.S() #[[S]] +// LINUX: define void @NotCalled.S() #[[S]] +// WINDOWS: define dso_local void @NotCalled.S() #[[S:[0-9]+]] // Done before any of the implementations. -__attribute__((cpu_dispatch(ivybridge, knl))) +ATTR(cpu_dispatch(ivybridge, knl)) void TwoVersions(void); -// CHECK: define void ()* @TwoVersions.resolver() -// CHECK: call void @__cpu_indicator_init -// CHECK: ret void ()* @TwoVersions.Z -// CHECK: ret void ()* @TwoVersions.S -// CHECK: call void @llvm.trap -// CHECK: unreachable - -__attribute__((cpu_specific(ivybridge))) +// LINUX: define void ()* @TwoVersions.resolver() +// LINUX: call void @__cpu_indicator_init +// LINUX: ret void ()* @TwoVersions.Z +// LINUX: ret void ()* @TwoVersions.S +// LINUX: call void @llvm.trap +// LINUX: unreachable + +// WINDOWS: define dso_local void @TwoVersions() +// WINDOWS: call void @__cpu_indicator_init() +// WINDOWS: call void @TwoVersions.Z() +// WINDOWS-NEXT: ret void +// WINDOWS: call void @TwoVersions.S() +// WINDOWS-NEXT: ret void +// WINDOWS: call void @llvm.trap +// WINDOWS: unreachable + +ATTR(cpu_specific(ivybridge)) void TwoVersions(void){} -// CHECK: define void @TwoVersions.S() #[[S]] +// CHECK: define {{.*}}void @TwoVersions.S() #[[S]] -__attribute__((cpu_specific(knl))) +ATTR(cpu_specific(knl)) void TwoVersions(void){} -// CHECK: define void @TwoVersions.Z() #[[K:[0-9]+]] +// CHECK: define {{.*}}void @TwoVersions.Z() #[[K:[0-9]+]] -__attribute__((cpu_specific(ivybridge, knl))) +ATTR(cpu_specific(ivybridge, knl)) void TwoVersionsSameAttr(void){} -// CHECK: define void @TwoVersionsSameAttr.S() #[[S]] -// CHECK: define void @TwoVersionsSameAttr.Z() #[[K]] +// CHECK: define {{.*}}void @TwoVersionsSameAttr.S() #[[S]] +// CHECK: define {{.*}}void @TwoVersionsSameAttr.Z() #[[K]] -__attribute__((cpu_specific(atom, ivybridge, knl))) +ATTR(cpu_specific(atom, ivybridge, knl)) void ThreeVersionsSameAttr(void){} -// CHECK: define void @ThreeVersionsSameAttr.O() #[[O:[0-9]+]] -// CHECK: define void @ThreeVersionsSameAttr.S() #[[S]] -// CHECK: define void @ThreeVersionsSameAttr.Z() #[[K]] +// CHECK: define {{.*}}void @ThreeVersionsSameAttr.O() #[[O:[0-9]+]] +// CHECK: define {{.*}}void @ThreeVersionsSameAttr.S() #[[S]] +// CHECK: define {{.*}}void @ThreeVersionsSameAttr.Z() #[[K]] void usages() { SingleVersion(); - // CHECK: @SingleVersion.ifunc() + // LINUX: @SingleVersion.ifunc() + // WINDOWS: @SingleVersion() TwoVersions(); - // CHECK: @TwoVersions.ifunc() + // LINUX: @TwoVersions.ifunc() + // WINDOWS: @TwoVersions() TwoVersionsSameAttr(); - // CHECK: @TwoVersionsSameAttr.ifunc() + // LINUX: @TwoVersionsSameAttr.ifunc() + // WINDOWS: @TwoVersionsSameAttr() ThreeVersionsSameAttr(); - // CHECK: @ThreeVersionsSameAttr.ifunc() + // LINUX: @ThreeVersionsSameAttr.ifunc() + // WINDOWS: @ThreeVersionsSameAttr() } // has an extra config to emit! -__attribute__((cpu_dispatch(ivybridge, knl, atom))) +ATTR(cpu_dispatch(ivybridge, knl, atom)) void TwoVersionsSameAttr(void); -// CHECK: define void ()* @TwoVersionsSameAttr.resolver() -// CHECK: ret void ()* @TwoVersionsSameAttr.Z -// CHECK: ret void ()* @TwoVersionsSameAttr.S -// CHECK: ret void ()* @TwoVersionsSameAttr.O -// CHECK: call void @llvm.trap -// CHECK: unreachable - -__attribute__((cpu_dispatch(atom, ivybridge, knl))) +// LINUX: define void ()* @TwoVersionsSameAttr.resolver() +// LINUX: ret void ()* @TwoVersionsSameAttr.Z +// LINUX: ret void ()* @TwoVersionsSameAttr.S +// LINUX: ret void ()* @TwoVersionsSameAttr.O +// LINUX: call void @llvm.trap +// LINUX: unreachable + +// WINDOWS: define dso_local void @TwoVersionsSameAttr() +// WINDOWS: call void @TwoVersionsSameAttr.Z +// WINDOWS-NEXT: ret void +// WINDOWS: call void @TwoVersionsSameAttr.S +// WINDOWS-NEXT: ret void +// WINDOWS: call void @TwoVersionsSameAttr.O +// WINDOWS-NEXT: ret void +// WINDOWS: call void @llvm.trap +// WINDOWS: unreachable + +ATTR(cpu_dispatch(atom, ivybridge, knl)) void ThreeVersionsSameAttr(void){} -// CHECK: define void ()* @ThreeVersionsSameAttr.resolver() -// CHECK: call void @__cpu_indicator_init -// CHECK: ret void ()* @ThreeVersionsSameAttr.Z -// CHECK: ret void ()* @ThreeVersionsSameAttr.S -// CHECK: ret void ()* @ThreeVersionsSameAttr.O -// CHECK: call void @llvm.trap -// CHECK: unreachable +// LINUX: define void ()* @ThreeVersionsSameAttr.resolver() +// LINUX: call void @__cpu_indicator_init +// LINUX: ret void ()* @ThreeVersionsSameAttr.Z +// LINUX: ret void ()* @ThreeVersionsSameAttr.S +// LINUX: ret void ()* @ThreeVersionsSameAttr.O +// LINUX: call void @llvm.trap +// LINUX: unreachable + +// WINDOWS: define dso_local void @ThreeVersionsSameAttr() +// WINDOWS: call void @__cpu_indicator_init +// WINDOWS: call void @ThreeVersionsSameAttr.Z +// WINDOWS-NEXT: ret void +// WINDOWS: call void @ThreeVersionsSameAttr.S +// WINDOWS-NEXT: ret void +// WINDOWS: call void @ThreeVersionsSameAttr.O +// WINDOWS-NEXT: ret void +// WINDOWS: call void @llvm.trap +// WINDOWS: unreachable // No Cpu Specific options. -__attribute__((cpu_dispatch(atom, ivybridge, knl))) +ATTR(cpu_dispatch(atom, ivybridge, knl)) void NoSpecifics(void); -// CHECK: define void ()* @NoSpecifics.resolver() -// CHECK: call void @__cpu_indicator_init -// CHECK: ret void ()* @NoSpecifics.Z -// CHECK: ret void ()* @NoSpecifics.S -// CHECK: ret void ()* @NoSpecifics.O -// CHECK: call void @llvm.trap -// CHECK: unreachable - -__attribute__((cpu_dispatch(atom, generic, ivybridge, knl))) +// LINUX: define void ()* @NoSpecifics.resolver() +// LINUX: call void @__cpu_indicator_init +// LINUX: ret void ()* @NoSpecifics.Z +// LINUX: ret void ()* @NoSpecifics.S +// LINUX: ret void ()* @NoSpecifics.O +// LINUX: call void @llvm.trap +// LINUX: unreachable + +// WINDOWS: define dso_local void @NoSpecifics() +// WINDOWS: call void @__cpu_indicator_init +// WINDOWS: call void @NoSpecifics.Z +// WINDOWS-NEXT: ret void +// WINDOWS: call void @NoSpecifics.S +// WINDOWS-NEXT: ret void +// WINDOWS: call void @NoSpecifics.O +// WINDOWS-NEXT: ret void +// WINDOWS: call void @llvm.trap +// WINDOWS: unreachable + +ATTR(cpu_dispatch(atom, generic, ivybridge, knl)) void HasGeneric(void); -// CHECK: define void ()* @HasGeneric.resolver() -// CHECK: call void @__cpu_indicator_init -// CHECK: ret void ()* @HasGeneric.Z -// CHECK: ret void ()* @HasGeneric.S -// CHECK: ret void ()* @HasGeneric.O -// CHECK: ret void ()* @HasGeneric.A -// CHECK-NOT: call void @llvm.trap +// LINUX: define void ()* @HasGeneric.resolver() +// LINUX: call void @__cpu_indicator_init +// LINUX: ret void ()* @HasGeneric.Z +// LINUX: ret void ()* @HasGeneric.S +// LINUX: ret void ()* @HasGeneric.O +// LINUX: ret void ()* @HasGeneric.A +// LINUX-NOT: call void @llvm.trap + +// WINDOWS: define dso_local void @HasGeneric() +// WINDOWS: call void @__cpu_indicator_init +// WINDOWS: call void @HasGeneric.Z +// WINDOWS-NEXT: ret void +// WINDOWS: call void @HasGeneric.S +// WINDOWS-NEXT: ret void +// WINDOWS: call void @HasGeneric.O +// WINDOWS-NEXT: ret void +// WINDOWS: call void @HasGeneric.A +// WINDOWS-NEXT: ret void +// WINDOWS-NOT: call void @llvm.trap + +ATTR(cpu_dispatch(atom, generic, ivybridge, knl)) +void HasParams(int i, double d); +// LINUX: define void (i32, double)* @HasParams.resolver() +// LINUX: call void @__cpu_indicator_init +// LINUX: ret void (i32, double)* @HasParams.Z +// LINUX: ret void (i32, double)* @HasParams.S +// LINUX: ret void (i32, double)* @HasParams.O +// LINUX: ret void (i32, double)* @HasParams.A +// LINUX-NOT: call void @llvm.trap + +// WINDOWS: define dso_local void @HasParams(i32, double) +// WINDOWS: call void @__cpu_indicator_init +// WINDOWS: call void @HasParams.Z(i32 %0, double %1) +// WINDOWS-NEXT: ret void +// WINDOWS: call void @HasParams.S(i32 %0, double %1) +// WINDOWS-NEXT: ret void +// WINDOWS: call void @HasParams.O(i32 %0, double %1) +// WINDOWS-NEXT: ret void +// WINDOWS: call void @HasParams.A(i32 %0, double %1) +// WINDOWS-NEXT: ret void +// WINDOWS-NOT: call void @llvm.trap + +ATTR(cpu_dispatch(atom, generic, ivybridge, knl)) +int HasParamsAndReturn(int i, double d); +// LINUX: define i32 (i32, double)* @HasParamsAndReturn.resolver() +// LINUX: call void @__cpu_indicator_init +// LINUX: ret i32 (i32, double)* @HasParamsAndReturn.Z +// LINUX: ret i32 (i32, double)* @HasParamsAndReturn.S +// LINUX: ret i32 (i32, double)* @HasParamsAndReturn.O +// LINUX: ret i32 (i32, double)* @HasParamsAndReturn.A +// LINUX-NOT: call void @llvm.trap + +// WINDOWS: define dso_local i32 @HasParamsAndReturn(i32, double) +// WINDOWS: call void @__cpu_indicator_init +// WINDOWS: %[[RET:.+]] = musttail call i32 @HasParamsAndReturn.Z(i32 %0, double %1) +// WINDOWS-NEXT: ret i32 %[[RET]] +// WINDOWS: %[[RET:.+]] = musttail call i32 @HasParamsAndReturn.S(i32 %0, double %1) +// WINDOWS-NEXT: ret i32 %[[RET]] +// WINDOWS: %[[RET:.+]] = musttail call i32 @HasParamsAndReturn.O(i32 %0, double %1) +// WINDOWS-NEXT: ret i32 %[[RET]] +// WINDOWS: %[[RET:.+]] = musttail call i32 @HasParamsAndReturn.A(i32 %0, double %1) +// WINDOWS-NEXT: ret i32 %[[RET]] +// WINDOWS-NOT: call void @llvm.trap // CHECK: attributes #[[S]] = {{.*}}"target-features"="+avx,+cmov,+f16c,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" // CHECK: attributes #[[K]] = {{.*}}"target-features"="+adx,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+cmov,+f16c,+fma,+lzcnt,+mmx,+movbe,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" Index: lib/CodeGen/CodeGenModule.h =================================================================== --- lib/CodeGen/CodeGenModule.h +++ lib/CodeGen/CodeGenModule.h @@ -1293,9 +1293,9 @@ llvm::AttributeList ExtraAttrs = llvm::AttributeList(), ForDefinition_t IsForDefinition = NotForDefinition); - llvm::Constant *GetOrCreateMultiVersionIFunc(GlobalDecl GD, - llvm::Type *DeclTy, - const FunctionDecl *FD); + llvm::Constant *GetOrCreateMultiVersionResolver(GlobalDecl GD, + llvm::Type *DeclTy, + const FunctionDecl *FD); void UpdateMultiVersionNames(GlobalDecl GD, const FunctionDecl *FD); llvm::Constant *GetOrCreateLLVMGlobal(StringRef MangledName, Index: lib/CodeGen/CodeGenModule.cpp =================================================================== --- lib/CodeGen/CodeGenModule.cpp +++ lib/CodeGen/CodeGenModule.cpp @@ -892,10 +892,11 @@ static void AppendCPUSpecificCPUDispatchMangling(const CodeGenModule &CGM, const CPUSpecificAttr *Attr, raw_ostream &Out) { - // cpu_specific gets the current name, dispatch gets the resolver. + // cpu_specific gets the current name, dispatch gets the resolver if IFunc is + // supported. if (Attr) Out << getCPUSpecificMangling(CGM, Attr->getCurCPUName()->getName()); - else + else if (CGM.getTarget().supportsIFunc()) Out << ".resolver"; } @@ -2507,13 +2508,19 @@ TA->getArchitecture(), Feats); }); - llvm::Function *ResolverFunc = cast<llvm::Function>( - GetGlobalValue((getMangledName(GD) + ".resolver").str())); + llvm::Function *ResolverFunc; + const TargetInfo &TI = getTarget(); + + if (TI.supportsIFunc() || FD->isTargetMultiVersion()) + ResolverFunc = cast<llvm::Function>( + GetGlobalValue((getMangledName(GD) + ".resolver").str())); + else + ResolverFunc = cast<llvm::Function>(GetGlobalValue(getMangledName(GD))); + if (supportsCOMDAT()) ResolverFunc->setComdat( getModule().getOrInsertComdat(ResolverFunc->getName())); - const TargetInfo &TI = getTarget(); std::stable_sort( Options.begin(), Options.end(), [&TI](const CodeGenFunction::MultiVersionResolverOption &LHS, @@ -2533,13 +2540,21 @@ llvm::Type *DeclTy = getTypes().ConvertTypeForMem(FD->getType()); StringRef ResolverName = getMangledName(GD); - llvm::Type *ResolverType = llvm::FunctionType::get( - llvm::PointerType::get(DeclTy, - Context.getTargetAddressSpace(FD->getType())), - false); - auto *ResolverFunc = cast<llvm::Function>( - GetOrCreateLLVMFunction(ResolverName, ResolverType, GlobalDecl{}, - /*ForVTable=*/false)); + + llvm::Type *ResolverType; + GlobalDecl ResolverGD; + if (getTarget().supportsIFunc()) + ResolverType = llvm::FunctionType::get( + llvm::PointerType::get(DeclTy, + Context.getTargetAddressSpace(FD->getType())), + false); + else { + ResolverType = DeclTy; + ResolverGD = GD; + } + + auto *ResolverFunc = cast<llvm::Function>(GetOrCreateLLVMFunction( + ResolverName, ResolverType, ResolverGD, /*ForVTable=*/false)); SmallVector<CodeGenFunction::MultiVersionResolverOption, 10> Options; const TargetInfo &Target = getTarget(); @@ -2571,37 +2586,53 @@ CGF.EmitMultiVersionResolver(ResolverFunc, Options); } -/// If an ifunc for the specified mangled name is not in the module, create and -/// return an llvm IFunc Function with the specified type. -llvm::Constant * -CodeGenModule::GetOrCreateMultiVersionIFunc(GlobalDecl GD, llvm::Type *DeclTy, - const FunctionDecl *FD) { +/// If a dispatcher for the specified mangled name is not in the module, create +/// and return an llvm Function with the specified type. +llvm::Constant *CodeGenModule::GetOrCreateMultiVersionResolver( + GlobalDecl GD, llvm::Type *DeclTy, const FunctionDecl *FD) { std::string MangledName = getMangledNameImpl(*this, GD, FD, /*OmitMultiVersionMangling=*/true); - std::string IFuncName = MangledName + ".ifunc"; - if (llvm::GlobalValue *IFuncGV = GetGlobalValue(IFuncName)) - return IFuncGV; + + // Holds the name of the resolver, in ifunc mode this is the ifunc (which has + // a separate resolver). + std::string ResolverName = MangledName; + if (getTarget().supportsIFunc()) + ResolverName += ".ifunc"; + else if (FD->isTargetMultiVersion()) + ResolverName += ".resolver"; + + // If this already exists, just return that one. + if (llvm::GlobalValue *ResolverGV = GetGlobalValue(ResolverName)) + return ResolverGV; // Since this is the first time we've created this IFunc, make sure // that we put this multiversioned function into the list to be // replaced later if necessary (target multiversioning only). if (!FD->isCPUDispatchMultiVersion() && !FD->isCPUSpecificMultiVersion()) MultiVersionFuncs.push_back(GD); - std::string ResolverName = MangledName + ".resolver"; - llvm::Type *ResolverType = llvm::FunctionType::get( - llvm::PointerType::get(DeclTy, - Context.getTargetAddressSpace(FD->getType())), - false); - llvm::Constant *Resolver = - GetOrCreateLLVMFunction(ResolverName, ResolverType, GlobalDecl{}, - /*ForVTable=*/false); - llvm::GlobalIFunc *GIF = llvm::GlobalIFunc::create( - DeclTy, 0, llvm::Function::ExternalLinkage, "", Resolver, &getModule()); - GIF->setName(IFuncName); - SetCommonAttributes(FD, GIF); + if (getTarget().supportsIFunc()) { + llvm::Type *ResolverType = llvm::FunctionType::get( + llvm::PointerType::get( + DeclTy, getContext().getTargetAddressSpace(FD->getType())), + false); + llvm::Constant *Resolver = GetOrCreateLLVMFunction( + MangledName + ".resolver", ResolverType, GlobalDecl{}, + /*ForVTable=*/false); + llvm::GlobalIFunc *GIF = llvm::GlobalIFunc::create( + DeclTy, 0, llvm::Function::ExternalLinkage, "", Resolver, &getModule()); + GIF->setName(ResolverName); + SetCommonAttributes(FD, GIF); + + return GIF; + } - return GIF; + llvm::Constant *Resolver = GetOrCreateLLVMFunction( + ResolverName, DeclTy, GlobalDecl{}, /*ForVTable=*/false); + assert(isa<llvm::GlobalValue>(Resolver) && + "Resolver should be created for the first time"); + SetCommonAttributes(FD, cast<llvm::GlobalValue>(Resolver)); + return Resolver; } /// GetOrCreateLLVMFunction - If the specified mangled name is not in the @@ -2641,7 +2672,7 @@ if (TA && TA->isDefaultVersion()) UpdateMultiVersionNames(GD, FD); if (!IsForDefinition) - return GetOrCreateMultiVersionIFunc(GD, Ty, FD); + return GetOrCreateMultiVersionResolver(GD, Ty, FD); } } Index: lib/CodeGen/CodeGenFunction.h =================================================================== --- lib/CodeGen/CodeGenFunction.h +++ lib/CodeGen/CodeGenFunction.h @@ -4274,6 +4274,7 @@ struct MultiVersionResolverOption { llvm::Function *Function; + FunctionDecl *FD; struct Conds { StringRef Architecture; llvm::SmallVector<StringRef, 8> Features; Index: lib/CodeGen/CodeGenFunction.cpp =================================================================== --- lib/CodeGen/CodeGenFunction.cpp +++ lib/CodeGen/CodeGenFunction.cpp @@ -2392,13 +2392,39 @@ return Condition; } +static void CreateMultiVersionResolverReturn(CodeGenModule &CGM, + llvm::Function *Resolver, + CGBuilderTy &Builder, + llvm::Function *FuncToReturn, + bool SupportsIFunc) { + if (SupportsIFunc) { + Builder.CreateRet(FuncToReturn); + return; + } + + llvm::SmallVector<llvm::Value *, 10> Args; + llvm::for_each(Resolver->args(), + [&](llvm::Argument &Arg) { Args.push_back(&Arg); }); + + llvm::CallInst *Result = Builder.CreateCall(FuncToReturn, Args); + Result->setTailCallKind(llvm::CallInst::TCK_MustTail); + + if (Resolver->getReturnType()->isVoidTy()) + Builder.CreateRetVoid(); + else + Builder.CreateRet(Result); +} + void CodeGenFunction::EmitMultiVersionResolver( llvm::Function *Resolver, ArrayRef<MultiVersionResolverOption> Options) { assert((getContext().getTargetInfo().getTriple().getArch() == llvm::Triple::x86 || getContext().getTargetInfo().getTriple().getArch() == llvm::Triple::x86_64) && "Only implemented for x86 targets"); + + bool SupportsIFunc = getContext().getTargetInfo().supportsIFunc(); + // Main function's basic block. llvm::BasicBlock *CurBlock = createBasicBlock("resolver_entry", Resolver); Builder.SetInsertPoint(CurBlock); @@ -2412,13 +2438,15 @@ if (!Condition) { assert(&RO == Options.end() - 1 && "Default or Generic case must be last"); - Builder.CreateRet(RO.Function); + CreateMultiVersionResolverReturn(CGM, Resolver, Builder, RO.Function, + SupportsIFunc); return; } llvm::BasicBlock *RetBlock = createBasicBlock("resolver_return", Resolver); - llvm::IRBuilder<> RetBuilder(RetBlock); - RetBuilder.CreateRet(RO.Function); + CGBuilderTy RetBuilder(*this, RetBlock); + CreateMultiVersionResolverReturn(CGM, Resolver, RetBuilder, RO.Function, + SupportsIFunc); CurBlock = createBasicBlock("resolver_else", Resolver); Builder.CreateCondBr(Condition, RetBlock, CurBlock); } Index: lib/Basic/Targets/X86.h =================================================================== --- lib/Basic/Targets/X86.h +++ lib/Basic/Targets/X86.h @@ -290,9 +290,6 @@ return checkCPUKind(CPU = getCPUKind(Name)); } - bool supportsMultiVersioning() const override { - return getTriple().isOSBinFormatELF(); - } unsigned multiVersionSortPriority(StringRef Name) const override; bool setFPMath(StringRef Name) override; Index: lib/AST/Decl.cpp =================================================================== --- lib/AST/Decl.cpp +++ lib/AST/Decl.cpp @@ -2947,6 +2947,10 @@ return isMultiVersion() && hasAttr<CPUSpecificAttr>(); } +bool FunctionDecl::isTargetMultiVersion() const { + return isMultiVersion() && hasAttr<TargetAttr>(); +} + void FunctionDecl::setPreviousDeclaration(FunctionDecl *PrevDecl) { redeclarable_base::setPreviousDecl(PrevDecl); Index: include/clang/Basic/TargetInfo.h =================================================================== --- include/clang/Basic/TargetInfo.h +++ include/clang/Basic/TargetInfo.h @@ -1082,9 +1082,15 @@ return false; } - /// Identify whether this taret supports multiversioning of functions, + /// Identify whether this target supports multiversioning of functions, /// which requires support for cpu_supports and cpu_is functionality. - virtual bool supportsMultiVersioning() const { return false; } + bool supportsMultiVersioning() const { + return getTriple().getArch() == llvm::Triple::x86 || + getTriple().getArch() == llvm::Triple::x86_64; + } + + /// Identify whether this target supports IFuncs. + bool supportsIFunc() const { return getTriple().isOSBinFormatELF(); } // Validate the contents of the __builtin_cpu_supports(const char*) // argument. Index: include/clang/Basic/Attr.td =================================================================== --- include/clang/Basic/Attr.td +++ include/clang/Basic/Attr.td @@ -858,7 +858,7 @@ } def CPUSpecific : InheritableAttr { - let Spellings = [Clang<"cpu_specific">]; + let Spellings = [Clang<"cpu_specific">, Declspec<"cpu_specific">]; let Args = [VariadicIdentifierArgument<"Cpus">]; let Subjects = SubjectList<[Function]>; let Documentation = [CPUSpecificCPUDispatchDocs]; @@ -872,7 +872,7 @@ } def CPUDispatch : InheritableAttr { - let Spellings = [Clang<"cpu_dispatch">]; + let Spellings = [Clang<"cpu_dispatch">, Declspec<"cpu_dispatch">]; let Args = [VariadicIdentifierArgument<"Cpus">]; let Subjects = SubjectList<[Function]>; let Documentation = [CPUSpecificCPUDispatchDocs]; Index: include/clang/AST/Decl.h =================================================================== --- include/clang/AST/Decl.h +++ include/clang/AST/Decl.h @@ -2233,6 +2233,10 @@ /// part of the cpu_specific/cpu_dispatch functionality. bool isCPUSpecificMultiVersion() const; + /// True if this function is a multiversioned dispatch function as a part of + /// the target functionality. + bool isTargetMultiVersion() const; + void setPreviousDeclaration(FunctionDecl * PrevDecl); FunctionDecl *getCanonicalDecl() override;
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits