erichkeane created this revision.
erichkeane added reviewers: echristo, rnk, aaron.ballman.
erichkeane added a subscriber: mibintc.

Similar to how ICC handles CPU-Dispatch on Windows, this patch uses the
resolver function directly to forward the call to the proper function.
This is not nearly as efficient as IFuncs of course, but is still quite
useful for large functions specifically developed for certain
processors.

This is unfortunately still limited to x86, since it depends on
__builtin_cpu_supports and __builtin_cpu_is, which are x86 builtins.

The naming for the resolver/forwarding function for cpu-dispatch was
taken from ICC's implementation, which uses the unmodified name for this
(no mangling additions).  This is possible, since cpu-dispatch uses '.A'
for the 'default' version.

In 'target' multiversioning, this function keeps the '.resolver'
extension in order to keep the default function keeping the default
mangling.


Repository:
  rC Clang

https://reviews.llvm.org/D53586

Files:
  include/clang/AST/Decl.h
  include/clang/Basic/Attr.td
  include/clang/Basic/TargetInfo.h
  lib/AST/Decl.cpp
  lib/Basic/Targets/X86.h
  lib/CodeGen/CodeGenFunction.cpp
  lib/CodeGen/CodeGenModule.cpp
  lib/CodeGen/CodeGenModule.h
  test/CodeGen/attr-cpuspecific.c
  test/CodeGen/attr-target-mv-func-ptrs.c
  test/CodeGen/attr-target-mv-va-args.c
  test/CodeGen/attr-target-mv.c
  test/CodeGenCXX/attr-target-mv-diff-ns.cpp
  test/CodeGenCXX/attr-target-mv-func-ptrs.cpp
  test/CodeGenCXX/attr-target-mv-member-funcs.cpp
  test/CodeGenCXX/attr-target-mv-out-of-line-defs.cpp
  test/CodeGenCXX/attr-target-mv-overloads.cpp
  test/Sema/attr-target-mv-bad-target.c

Index: test/Sema/attr-target-mv-bad-target.c
===================================================================
--- test/Sema/attr-target-mv-bad-target.c
+++ test/Sema/attr-target-mv-bad-target.c
@@ -1,4 +1,3 @@
-// RUN: %clang_cc1 -triple x86_64-windows-pc  -fsyntax-only -verify %s
 // RUN: %clang_cc1 -triple arm-none-eabi  -fsyntax-only -verify %s
 
 int __attribute__((target("sse4.2"))) redecl1(void) { return 1; }
Index: test/CodeGenCXX/attr-target-mv-overloads.cpp
===================================================================
--- test/CodeGenCXX/attr-target-mv-overloads.cpp
+++ test/CodeGenCXX/attr-target-mv-overloads.cpp
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=LINUX
+// RUN: %clang_cc1 -std=c++11 -triple x86_64-windows-pc -emit-llvm %s -o - | FileCheck %s --check-prefix=WINDOWS
 
 int __attribute__((target("sse4.2"))) foo_overload(int) { return 0; }
 int __attribute__((target("arch=sandybridge"))) foo_overload(int);
@@ -13,38 +14,70 @@
   return foo_overload() + foo_overload(1);
 }
 
-// CHECK: @_Z12foo_overloadv.ifunc = ifunc i32 (), i32 ()* ()* @_Z12foo_overloadv.resolver
-// CHECK: @_Z12foo_overloadi.ifunc = ifunc i32 (i32), i32 (i32)* ()* @_Z12foo_overloadi.resolver
-
-
-// CHECK: define i32 @_Z12foo_overloadi.sse4.2(i32)
-// CHECK: ret i32 0
-// CHECK: define i32 @_Z12foo_overloadi.arch_ivybridge(i32)
-// CHECK: ret i32 1
-// CHECK: define i32 @_Z12foo_overloadi(i32)
-// CHECK: ret i32 2
-// CHECK: define i32 @_Z12foo_overloadv.sse4.2()
-// CHECK: ret i32 0
-// CHECK: define i32 @_Z12foo_overloadv.arch_ivybridge()
-// CHECK: ret i32 1
-// CHECK: define i32 @_Z12foo_overloadv()
-// CHECK: ret i32 2
-
-// CHECK: define i32 @_Z4bar2v()
-// CHECK: call i32 @_Z12foo_overloadv.ifunc()
-// CHECK: call i32 @_Z12foo_overloadi.ifunc(i32 1)
-
-// CHECK: define i32 ()* @_Z12foo_overloadv.resolver() comdat
-// CHECK: ret i32 ()* @_Z12foo_overloadv.arch_sandybridge
-// CHECK: ret i32 ()* @_Z12foo_overloadv.arch_ivybridge
-// CHECK: ret i32 ()* @_Z12foo_overloadv.sse4.2
-// CHECK: ret i32 ()* @_Z12foo_overloadv
-
-// CHECK: define i32 (i32)* @_Z12foo_overloadi.resolver() comdat
-// CHECK: ret i32 (i32)* @_Z12foo_overloadi.arch_sandybridge
-// CHECK: ret i32 (i32)* @_Z12foo_overloadi.arch_ivybridge
-// CHECK: ret i32 (i32)* @_Z12foo_overloadi.sse4.2
-// CHECK: ret i32 (i32)* @_Z12foo_overloadi
-
-// CHECK: declare i32 @_Z12foo_overloadv.arch_sandybridge()
-// CHECK: declare i32 @_Z12foo_overloadi.arch_sandybridge(i32)
+// LINUX: @_Z12foo_overloadv.ifunc = ifunc i32 (), i32 ()* ()* @_Z12foo_overloadv.resolver
+// LINUX: @_Z12foo_overloadi.ifunc = ifunc i32 (i32), i32 (i32)* ()* @_Z12foo_overloadi.resolver
+
+
+// LINUX: define i32 @_Z12foo_overloadi.sse4.2(i32)
+// LINUX: ret i32 0
+// LINUX: define i32 @_Z12foo_overloadi.arch_ivybridge(i32)
+// LINUX: ret i32 1
+// LINUX: define i32 @_Z12foo_overloadi(i32)
+// LINUX: ret i32 2
+// LINUX: define i32 @_Z12foo_overloadv.sse4.2()
+// LINUX: ret i32 0
+// LINUX: define i32 @_Z12foo_overloadv.arch_ivybridge()
+// LINUX: ret i32 1
+// LINUX: define i32 @_Z12foo_overloadv()
+// LINUX: ret i32 2
+
+// WINDOWS: define dso_local i32 @"?foo_overload@@YAHH@Z.sse4.2"(i32)
+// WINDOWS: ret i32 0
+// WINDOWS: define dso_local i32 @"?foo_overload@@YAHH@Z.arch_ivybridge"(i32)
+// WINDOWS: ret i32 1
+// WINDOWS: define dso_local i32 @"?foo_overload@@YAHH@Z"(i32)
+// WINDOWS: ret i32 2
+// WINDOWS: define dso_local i32 @"?foo_overload@@YAHXZ.sse4.2"()
+// WINDOWS: ret i32 0
+// WINDOWS: define dso_local i32 @"?foo_overload@@YAHXZ.arch_ivybridge"()
+// WINDOWS: ret i32 1
+// WINDOWS: define dso_local i32 @"?foo_overload@@YAHXZ"()
+// WINDOWS: ret i32 2
+
+// LINUX: define i32 @_Z4bar2v()
+// LINUX: call i32 @_Z12foo_overloadv.ifunc()
+// LINUX: call i32 @_Z12foo_overloadi.ifunc(i32 1)
+
+// WINDOWS: define dso_local i32 @"?bar2@@YAHXZ"()
+// WINDOWS: call i32 @"?foo_overload@@YAHXZ.resolver"()
+// WINDOWS: call i32 @"?foo_overload@@YAHH@Z.resolver"(i32 1)
+
+// LINUX: define i32 ()* @_Z12foo_overloadv.resolver() comdat
+// LINUX: ret i32 ()* @_Z12foo_overloadv.arch_sandybridge
+// LINUX: ret i32 ()* @_Z12foo_overloadv.arch_ivybridge
+// LINUX: ret i32 ()* @_Z12foo_overloadv.sse4.2
+// LINUX: ret i32 ()* @_Z12foo_overloadv
+
+// WINDOWS: define dso_local i32 @"?foo_overload@@YAHXZ.resolver"() comdat
+// WINDOWS: call i32 @"?foo_overload@@YAHXZ.arch_sandybridge"
+// WINDOWS: call i32 @"?foo_overload@@YAHXZ.arch_ivybridge"
+// WINDOWS: call i32 @"?foo_overload@@YAHXZ.sse4.2"
+// WINDOWS: call i32 @"?foo_overload@@YAHXZ"
+
+// LINUX: define i32 (i32)* @_Z12foo_overloadi.resolver() comdat
+// LINUX: ret i32 (i32)* @_Z12foo_overloadi.arch_sandybridge
+// LINUX: ret i32 (i32)* @_Z12foo_overloadi.arch_ivybridge
+// LINUX: ret i32 (i32)* @_Z12foo_overloadi.sse4.2
+// LINUX: ret i32 (i32)* @_Z12foo_overloadi
+
+// WINDOWS: define dso_local i32 @"?foo_overload@@YAHH@Z.resolver"(i32) comdat
+// WINDOWS: call i32 @"?foo_overload@@YAHH@Z.arch_sandybridge"
+// WINDOWS: call i32 @"?foo_overload@@YAHH@Z.arch_ivybridge"
+// WINDOWS: call i32 @"?foo_overload@@YAHH@Z.sse4.2"
+// WINDOWS: call i32 @"?foo_overload@@YAHH@Z"
+
+// LINUX: declare i32 @_Z12foo_overloadv.arch_sandybridge()
+// LINUX: declare i32 @_Z12foo_overloadi.arch_sandybridge(i32)
+
+// WINDOWS: declare dso_local i32 @"?foo_overload@@YAHXZ.arch_sandybridge"()
+// WINDOWS: declare dso_local i32 @"?foo_overload@@YAHH@Z.arch_sandybridge"(i32)
Index: test/CodeGenCXX/attr-target-mv-out-of-line-defs.cpp
===================================================================
--- test/CodeGenCXX/attr-target-mv-out-of-line-defs.cpp
+++ test/CodeGenCXX/attr-target-mv-out-of-line-defs.cpp
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=LINUX
+// RUN: %clang_cc1 -std=c++11 -triple x86_64-windows-pc -emit-llvm %s -o - | FileCheck %s --check-prefix=WINDOWS
 struct S {
   int __attribute__((target("sse4.2"))) foo(int);
   int __attribute__((target("arch=sandybridge"))) foo(int);
@@ -15,25 +16,46 @@
   return s.foo(0);
 }
 
-// CHECK: @_ZN1S3fooEi.ifunc = ifunc i32 (%struct.S*, i32), i32 (%struct.S*, i32)* ()* @_ZN1S3fooEi.resolver
+// LINUX: @_ZN1S3fooEi.ifunc = ifunc i32 (%struct.S*, i32), i32 (%struct.S*, i32)* ()* @_ZN1S3fooEi.resolver
 
-// CHECK: define i32 @_ZN1S3fooEi(%struct.S* %this, i32)
-// CHECK: ret i32 2
+// LINUX: define i32 @_ZN1S3fooEi(%struct.S* %this, i32)
+// LINUX: ret i32 2
 
-// CHECK: define i32 @_ZN1S3fooEi.sse4.2(%struct.S* %this, i32)
-// CHECK: ret i32 0
+// WINDOWS: define dso_local i32 @"?foo@S@@QEAAHH@Z"(%struct.S* %this, i32)
+// WINDOWS: ret i32 2
 
-// CHECK: define i32 @_ZN1S3fooEi.arch_ivybridge(%struct.S* %this, i32)
-// CHECK: ret i32 1
+// LINUX: define i32 @_ZN1S3fooEi.sse4.2(%struct.S* %this, i32)
+// LINUX: ret i32 0
 
-// CHECK: define i32 @_Z3barv()
-// CHECK: %s = alloca %struct.S, align 1
-// CHECK: %call = call i32 @_ZN1S3fooEi.ifunc(%struct.S* %s, i32 0)
+// WINDOWS: define dso_local i32 @"?foo@S@@QEAAHH@Z.sse4.2"(%struct.S* %this, i32)
+// WINDOWS: ret i32 0
 
-// CHECK: define i32 (%struct.S*, i32)* @_ZN1S3fooEi.resolver() comdat
-// CHECK: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.arch_sandybridge
-// CHECK: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.arch_ivybridge
-// CHECK: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.sse4.2
-// CHECK: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi
+// LINUX: define i32 @_ZN1S3fooEi.arch_ivybridge(%struct.S* %this, i32)
+// LINUX: ret i32 1
 
-// CHECK: declare i32 @_ZN1S3fooEi.arch_sandybridge(%struct.S*, i32)
+// WINDOWS: define dso_local i32 @"?foo@S@@QEAAHH@Z.arch_ivybridge"(%struct.S* %this, i32)
+// WINDOWS: ret i32 1
+
+// LINUX: define i32 @_Z3barv()
+// LINUX: %s = alloca %struct.S, align 1
+// LINUX: %call = call i32 @_ZN1S3fooEi.ifunc(%struct.S* %s, i32 0)
+
+// WINDOWS: define dso_local i32 @"?bar@@YAHXZ"()
+// WINDOWS: %s = alloca %struct.S, align 1
+// WINDOWS: %call = call i32 @"?foo@S@@QEAAHH@Z.resolver"(%struct.S* %s, i32 0)
+
+// LINUX: define i32 (%struct.S*, i32)* @_ZN1S3fooEi.resolver() comdat
+// LINUX: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.arch_sandybridge
+// LINUX: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.arch_ivybridge
+// LINUX: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.sse4.2
+// LINUX: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi
+
+// WINDOWS: define dso_local i32 @"?foo@S@@QEAAHH@Z.resolver"(%struct.S*, i32) comdat
+// WINDOWS: call i32 @"?foo@S@@QEAAHH@Z.arch_sandybridge"(%struct.S* %0, i32 %1)
+// WINDOWS: call i32 @"?foo@S@@QEAAHH@Z.arch_ivybridge"(%struct.S* %0, i32 %1)
+// WINDOWS: call i32 @"?foo@S@@QEAAHH@Z.sse4.2"(%struct.S* %0, i32 %1)
+// WINDOWS: call i32 @"?foo@S@@QEAAHH@Z"(%struct.S* %0, i32 %1)
+
+// LINUX: declare i32 @_ZN1S3fooEi.arch_sandybridge(%struct.S*, i32)
+
+// WINDOWS: declare dso_local i32 @"?foo@S@@QEAAHH@Z.arch_sandybridge"(%struct.S*, i32)
Index: test/CodeGenCXX/attr-target-mv-member-funcs.cpp
===================================================================
--- test/CodeGenCXX/attr-target-mv-member-funcs.cpp
+++ test/CodeGenCXX/attr-target-mv-member-funcs.cpp
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=LINUX
+// RUN: %clang_cc1 -std=c++11 -triple x86_64-windows-pc -emit-llvm %s -o - | FileCheck %s --check-prefix=WINDOWS
 
 struct S {
   int __attribute__((target("sse4.2"))) foo(int) { return 0; }
@@ -64,82 +65,156 @@
   return a.foo(1) + b.foo(2);
 }
 
-// CHECK: @_ZN1SaSERKS_.ifunc = ifunc %struct.S* (%struct.S*, %struct.S*), %struct.S* (%struct.S*, %struct.S*)* ()* @_ZN1SaSERKS_.resolver
-// CHECK: @_ZNK9ConvertTocv1SEv.ifunc = ifunc void (%struct.ConvertTo*), void (%struct.ConvertTo*)* ()* @_ZNK9ConvertTocv1SEv.resolver
-// CHECK: @_ZN1S3fooEi.ifunc = ifunc i32 (%struct.S*, i32), i32 (%struct.S*, i32)* ()* @_ZN1S3fooEi.resolver
-// CHECK: @_ZN2S23fooEi.ifunc = ifunc i32 (%struct.S2*, i32), i32 (%struct.S2*, i32)* ()* @_ZN2S23fooEi.resolver
+// LINUX: @_ZN1SaSERKS_.ifunc = ifunc %struct.S* (%struct.S*, %struct.S*), %struct.S* (%struct.S*, %struct.S*)* ()* @_ZN1SaSERKS_.resolver
+// LINUX: @_ZNK9ConvertTocv1SEv.ifunc = ifunc void (%struct.ConvertTo*), void (%struct.ConvertTo*)* ()* @_ZNK9ConvertTocv1SEv.resolver
+// LINUX: @_ZN1S3fooEi.ifunc = ifunc i32 (%struct.S*, i32), i32 (%struct.S*, i32)* ()* @_ZN1S3fooEi.resolver
+// LINUX: @_ZN2S23fooEi.ifunc = ifunc i32 (%struct.S2*, i32), i32 (%struct.S2*, i32)* ()* @_ZN2S23fooEi.resolver
 // Templates:
-// CHECK: @_ZN5templIiE3fooEi.ifunc = ifunc i32 (%struct.templ*, i32), i32 (%struct.templ*, i32)* ()* @_ZN5templIiE3fooEi.resolver
-// CHECK: @_ZN5templIdE3fooEi.ifunc = ifunc i32 (%struct.templ.0*, i32), i32 (%struct.templ.0*, i32)* ()* @_ZN5templIdE3fooEi.resolver
-
-// CHECK: define i32 @_Z3barv()
-// CHECK: %s = alloca %struct.S, align 1
-// CHECK: %s2 = alloca %struct.S, align 1
-// CHECK: %C = alloca %struct.ConvertTo, align 1
-// CHECK: call dereferenceable(1) %struct.S* @_ZN1SaSERKS_.ifunc(%struct.S* %s2
-// CHECK: call void @_ZNK9ConvertTocv1SEv.ifunc(%struct.ConvertTo* %C)
-// CHECK: call dereferenceable(1) %struct.S* @_ZN1SaSERKS_.ifunc(%struct.S* %s2
-// CHECK: call i32 @_ZN1S3fooEi.ifunc(%struct.S* %s, i32 0)
-
-// CHECK: define %struct.S* (%struct.S*, %struct.S*)* @_ZN1SaSERKS_.resolver() comdat
-// CHECK: ret %struct.S* (%struct.S*, %struct.S*)* @_ZN1SaSERKS_.arch_ivybridge
-// CHECK: ret %struct.S* (%struct.S*, %struct.S*)* @_ZN1SaSERKS_
-
-// CHECK: define void (%struct.ConvertTo*)* @_ZNK9ConvertTocv1SEv.resolver() comdat
-// CHECK: ret void (%struct.ConvertTo*)* @_ZNK9ConvertTocv1SEv.arch_ivybridge
-// CHECK: ret void (%struct.ConvertTo*)* @_ZNK9ConvertTocv1SEv
-
-// CHECK: define i32 (%struct.S*, i32)* @_ZN1S3fooEi.resolver() comdat 
-// CHECK: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.arch_sandybridge
-// CHECK: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.arch_ivybridge
-// CHECK: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.sse4.2
-// CHECK: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi
-
-// CHECK: define i32 @_Z4bar2v()
-// CHECK:call i32 @_ZN2S23fooEi.ifunc
-// define i32 (%struct.S2*, i32)* @_ZN2S23fooEi.resolver() comdat
-// CHECK: ret i32 (%struct.S2*, i32)* @_ZN2S23fooEi.arch_sandybridge
-// CHECK: ret i32 (%struct.S2*, i32)* @_ZN2S23fooEi.arch_ivybridge
-// CHECK: ret i32 (%struct.S2*, i32)* @_ZN2S23fooEi.sse4.2
-// CHECK: ret i32 (%struct.S2*, i32)* @_ZN2S23fooEi
-
-// CHECK: define i32 @_ZN2S23fooEi.sse4.2(%struct.S2* %this, i32)
-// CHECK: define i32 @_ZN2S23fooEi.arch_ivybridge(%struct.S2* %this, i32)
-// CHECK: define i32 @_ZN2S23fooEi(%struct.S2* %this, i32)
-
-// CHECK: define i32 @_Z9templ_usev()
-// CHECK:  call i32 @_ZN5templIiE3fooEi.ifunc
-// CHECK:  call i32 @_ZN5templIdE3fooEi.ifunc
-
-// CHECK: define i32 (%struct.templ*, i32)* @_ZN5templIiE3fooEi.resolver() comdat
-// CHECK: ret i32 (%struct.templ*, i32)* @_ZN5templIiE3fooEi.arch_sandybridge
-// CHECK: ret i32 (%struct.templ*, i32)* @_ZN5templIiE3fooEi.arch_ivybridge
-// CHECK: ret i32 (%struct.templ*, i32)* @_ZN5templIiE3fooEi.sse4.2
-// CHECK: ret i32 (%struct.templ*, i32)* @_ZN5templIiE3fooEi
-
-// CHECK: define i32 (%struct.templ.0*, i32)* @_ZN5templIdE3fooEi.resolver() comdat
-// CHECK: ret i32 (%struct.templ.0*, i32)* @_ZN5templIdE3fooEi.arch_sandybridge
-// CHECK: ret i32 (%struct.templ.0*, i32)* @_ZN5templIdE3fooEi.arch_ivybridge
-// CHECK: ret i32 (%struct.templ.0*, i32)* @_ZN5templIdE3fooEi.sse4.2
-// CHECK: ret i32 (%struct.templ.0*, i32)* @_ZN5templIdE3fooEi
-
-// CHECK: define linkonce_odr i32 @_ZN1S3fooEi.sse4.2(%struct.S* %this, i32)
-// CHECK: ret i32 0
-
-// CHECK: declare i32 @_ZN1S3fooEi.arch_sandybridge(%struct.S*, i32)
-
-// CHECK: define linkonce_odr i32 @_ZN1S3fooEi.arch_ivybridge(%struct.S* %this, i32)
-// CHECK: ret i32 1
-
-// CHECK: define linkonce_odr i32 @_ZN1S3fooEi(%struct.S* %this, i32)
-// CHECK: ret i32 2
-
-// CHECK: define linkonce_odr i32 @_ZN5templIiE3fooEi.sse4.2
-// CHECK: declare i32 @_ZN5templIiE3fooEi.arch_sandybridge
-// CHECK: define linkonce_odr i32 @_ZN5templIiE3fooEi.arch_ivybridge
-// CHECK: define linkonce_odr i32 @_ZN5templIiE3fooEi
-
-// CHECK: define linkonce_odr i32 @_ZN5templIdE3fooEi.sse4.2
-// CHECK: declare i32 @_ZN5templIdE3fooEi.arch_sandybridge
-// CHECK: define linkonce_odr i32 @_ZN5templIdE3fooEi.arch_ivybridge
-// CHECK: define linkonce_odr i32 @_ZN5templIdE3fooEi
+// LINUX: @_ZN5templIiE3fooEi.ifunc = ifunc i32 (%struct.templ*, i32), i32 (%struct.templ*, i32)* ()* @_ZN5templIiE3fooEi.resolver
+// LINUX: @_ZN5templIdE3fooEi.ifunc = ifunc i32 (%struct.templ.0*, i32), i32 (%struct.templ.0*, i32)* ()* @_ZN5templIdE3fooEi.resolver
+
+// LINUX: define i32 @_Z3barv()
+// LINUX: %s = alloca %struct.S, align 1
+// LINUX: %s2 = alloca %struct.S, align 1
+// LINUX: %C = alloca %struct.ConvertTo, align 1
+// LINUX: call dereferenceable(1) %struct.S* @_ZN1SaSERKS_.ifunc(%struct.S* %s2
+// LINUX: call void @_ZNK9ConvertTocv1SEv.ifunc(%struct.ConvertTo* %C)
+// LINUX: call dereferenceable(1) %struct.S* @_ZN1SaSERKS_.ifunc(%struct.S* %s2
+// LINUX: call i32 @_ZN1S3fooEi.ifunc(%struct.S* %s, i32 0)
+
+// WINDOWS: define dso_local i32 @"?bar@@YAHXZ"()
+// WINDOWS: %s = alloca %struct.S, align 1
+// WINDOWS: %s2 = alloca %struct.S, align 1
+// WINDOWS: %C = alloca %struct.ConvertTo, align 1
+// WINDOWS: call dereferenceable(1) %struct.S* @"??4S@@QEAAAEAU0@AEBU0@@Z.resolver"(%struct.S* %s2
+// WINDOWS: call void @"??BConvertTo@@QEBA?AUS@@XZ.resolver"(%struct.ConvertTo* %C
+// WINDOWS: call dereferenceable(1) %struct.S* @"??4S@@QEAAAEAU0@AEBU0@@Z.resolver"(%struct.S* %s2
+// WINDOWS: call i32 @"?foo@S@@QEAAHH@Z.resolver"(%struct.S* %s, i32 0)
+
+// LINUX: define %struct.S* (%struct.S*, %struct.S*)* @_ZN1SaSERKS_.resolver() comdat
+// LINUX: ret %struct.S* (%struct.S*, %struct.S*)* @_ZN1SaSERKS_.arch_ivybridge
+// LINUX: ret %struct.S* (%struct.S*, %struct.S*)* @_ZN1SaSERKS_
+
+// WINDOWS: define dso_local %struct.S* @"??4S@@QEAAAEAU0@AEBU0@@Z.resolver"(%struct.S*, %struct.S*)
+// WINDOWS: call %struct.S* @"??4S@@QEAAAEAU0@AEBU0@@Z.arch_ivybridge"
+// WINDOWS: call %struct.S* @"??4S@@QEAAAEAU0@AEBU0@@Z"
+
+// LINUX: define void (%struct.ConvertTo*)* @_ZNK9ConvertTocv1SEv.resolver() comdat
+// LINUX: ret void (%struct.ConvertTo*)* @_ZNK9ConvertTocv1SEv.arch_ivybridge
+// LINUX: ret void (%struct.ConvertTo*)* @_ZNK9ConvertTocv1SEv
+
+// WINDOWS: define dso_local void @"??BConvertTo@@QEBA?AUS@@XZ.resolver"(%struct.ConvertTo*, %struct.S*)
+// WINDOWS: call void @"??BConvertTo@@QEBA?AUS@@XZ.arch_ivybridge"
+// WINDOWS: call void @"??BConvertTo@@QEBA?AUS@@XZ"
+
+// LINUX: define i32 (%struct.S*, i32)* @_ZN1S3fooEi.resolver() comdat 
+// LINUX: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.arch_sandybridge
+// LINUX: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.arch_ivybridge
+// LINUX: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.sse4.2
+// LINUX: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi
+
+// WINDOWS: define dso_local i32 @"?foo@S@@QEAAHH@Z.resolver"(%struct.S*, i32)
+// WINDOWS: call i32 @"?foo@S@@QEAAHH@Z.arch_sandybridge"
+// WINDOWS: call i32 @"?foo@S@@QEAAHH@Z.arch_ivybridge"
+// WINDOWS: call i32 @"?foo@S@@QEAAHH@Z.sse4.2"
+// WINDOWS: call i32 @"?foo@S@@QEAAHH@Z"
+
+// LINUX: define i32 @_Z4bar2v()
+// LINUX: call i32 @_ZN2S23fooEi.ifunc
+
+// WINDOWS: define dso_local i32 @"?bar2@@YAHXZ"()
+// WINDOWS: call i32 @"?foo@S2@@QEAAHH@Z.resolver"
+
+// LINUX: define i32 (%struct.S2*, i32)* @_ZN2S23fooEi.resolver() comdat
+// LINUX: ret i32 (%struct.S2*, i32)* @_ZN2S23fooEi.arch_sandybridge
+// LINUX: ret i32 (%struct.S2*, i32)* @_ZN2S23fooEi.arch_ivybridge
+// LINUX: ret i32 (%struct.S2*, i32)* @_ZN2S23fooEi.sse4.2
+// LINUX: ret i32 (%struct.S2*, i32)* @_ZN2S23fooEi
+
+// WINDOWS: define dso_local i32 @"?foo@S2@@QEAAHH@Z.resolver"(%struct.S2*, i32)
+// WINDOWS: call i32 @"?foo@S2@@QEAAHH@Z.arch_sandybridge"
+// WINDOWS: call i32 @"?foo@S2@@QEAAHH@Z.arch_ivybridge"
+// WINDOWS: call i32 @"?foo@S2@@QEAAHH@Z.sse4.2"
+// WINDOWS: call i32 @"?foo@S2@@QEAAHH@Z"
+
+// LINUX: define i32 @_ZN2S23fooEi.sse4.2(%struct.S2* %this, i32)
+// LINUX: define i32 @_ZN2S23fooEi.arch_ivybridge(%struct.S2* %this, i32)
+// LINUX: define i32 @_ZN2S23fooEi(%struct.S2* %this, i32)
+
+// WINDOWS: define dso_local i32 @"?foo@S2@@QEAAHH@Z.sse4.2"(%struct.S2* %this, i32)
+// WINDOWS: define dso_local i32 @"?foo@S2@@QEAAHH@Z.arch_ivybridge"(%struct.S2* %this, i32)
+// WINDOWS: define dso_local i32 @"?foo@S2@@QEAAHH@Z"(%struct.S2* %this, i32)
+
+// LINUX: define i32 @_Z9templ_usev()
+// LINUX: call i32 @_ZN5templIiE3fooEi.ifunc
+// LINUX: call i32 @_ZN5templIdE3fooEi.ifunc
+
+// WINDOWS: define dso_local i32 @"?templ_use@@YAHXZ"()
+// WINDOWS: call i32 @"?foo@?$templ@H@@QEAAHH@Z.resolver"
+// WINDOWS: call i32 @"?foo@?$templ@N@@QEAAHH@Z.resolver"
+
+// LINUX: define i32 (%struct.templ*, i32)* @_ZN5templIiE3fooEi.resolver() comdat
+// LINUX: ret i32 (%struct.templ*, i32)* @_ZN5templIiE3fooEi.arch_sandybridge
+// LINUX: ret i32 (%struct.templ*, i32)* @_ZN5templIiE3fooEi.arch_ivybridge
+// LINUX: ret i32 (%struct.templ*, i32)* @_ZN5templIiE3fooEi.sse4.2
+// LINUX: ret i32 (%struct.templ*, i32)* @_ZN5templIiE3fooEi
+
+// WINDOWS: define dso_local i32 @"?foo@?$templ@H@@QEAAHH@Z.resolver"(%struct.templ*, i32)
+// WINDOWS: call i32 @"?foo@?$templ@H@@QEAAHH@Z.arch_sandybridge"
+// WINDOWS: call i32 @"?foo@?$templ@H@@QEAAHH@Z.arch_ivybridge"
+// WINDOWS: call i32 @"?foo@?$templ@H@@QEAAHH@Z.sse4.2"
+// WINDOWS: call i32 @"?foo@?$templ@H@@QEAAHH@Z"
+
+// LINUX: define i32 (%struct.templ.0*, i32)* @_ZN5templIdE3fooEi.resolver() comdat
+// LINUX: ret i32 (%struct.templ.0*, i32)* @_ZN5templIdE3fooEi.arch_sandybridge
+// LINUX: ret i32 (%struct.templ.0*, i32)* @_ZN5templIdE3fooEi.arch_ivybridge
+// LINUX: ret i32 (%struct.templ.0*, i32)* @_ZN5templIdE3fooEi.sse4.2
+// LINUX: ret i32 (%struct.templ.0*, i32)* @_ZN5templIdE3fooEi
+
+// WINDOWS: define dso_local i32 @"?foo@?$templ@N@@QEAAHH@Z.resolver"(%struct.templ.0*, i32) comdat
+// WINDOWS: call i32 @"?foo@?$templ@N@@QEAAHH@Z.arch_sandybridge"
+// WINDOWS: call i32 @"?foo@?$templ@N@@QEAAHH@Z.arch_ivybridge"
+// WINDOWS: call i32 @"?foo@?$templ@N@@QEAAHH@Z.sse4.2"
+// WINDOWS: call i32 @"?foo@?$templ@N@@QEAAHH@Z"
+
+// LINUX: define linkonce_odr i32 @_ZN1S3fooEi.sse4.2(%struct.S* %this, i32)
+// LINUX: ret i32 0
+
+// WINDOWS: define linkonce_odr dso_local i32 @"?foo@S@@QEAAHH@Z.sse4.2"(%struct.S* %this, i32)
+// WINDOWS: ret i32 0
+
+// LINUX: declare i32 @_ZN1S3fooEi.arch_sandybridge(%struct.S*, i32)
+
+// WINDOWS: declare dso_local i32 @"?foo@S@@QEAAHH@Z.arch_sandybridge"(%struct.S*, i32)
+
+// LINUX: define linkonce_odr i32 @_ZN1S3fooEi.arch_ivybridge(%struct.S* %this, i32)
+// LINUX: ret i32 1
+
+// WINDOWS: define linkonce_odr dso_local i32 @"?foo@S@@QEAAHH@Z.arch_ivybridge"(%struct.S* %this, i32)
+// WINDOWS: ret i32 1
+
+// LINUX: define linkonce_odr i32 @_ZN1S3fooEi(%struct.S* %this, i32)
+// LINUX: ret i32 2
+
+// WINDOWS: define linkonce_odr dso_local i32 @"?foo@S@@QEAAHH@Z"(%struct.S* %this, i32)
+// WINDOWS: ret i32 2
+
+// LINUX: define linkonce_odr i32 @_ZN5templIiE3fooEi.sse4.2
+// LINUX: declare i32 @_ZN5templIiE3fooEi.arch_sandybridge
+// LINUX: define linkonce_odr i32 @_ZN5templIiE3fooEi.arch_ivybridge
+// LINUX: define linkonce_odr i32 @_ZN5templIiE3fooEi
+
+// WINDOWS: define linkonce_odr dso_local i32 @"?foo@?$templ@H@@QEAAHH@Z.sse4.2"
+// WINDOWS: declare dso_local i32 @"?foo@?$templ@H@@QEAAHH@Z.arch_sandybridge"
+// WINDOWS: define linkonce_odr dso_local i32 @"?foo@?$templ@H@@QEAAHH@Z.arch_ivybridge"
+// WINDOWS: define linkonce_odr dso_local i32 @"?foo@?$templ@H@@QEAAHH@Z"
+
+// LINUX: define linkonce_odr i32 @_ZN5templIdE3fooEi.sse4.2
+// LINUX: declare i32 @_ZN5templIdE3fooEi.arch_sandybridge
+// LINUX: define linkonce_odr i32 @_ZN5templIdE3fooEi.arch_ivybridge
+// LINUX: define linkonce_odr i32 @_ZN5templIdE3fooEi
+
+// WINDOWS: define linkonce_odr dso_local i32 @"?foo@?$templ@N@@QEAAHH@Z.sse4.2"
+// WINDOWS: declare dso_local i32 @"?foo@?$templ@N@@QEAAHH@Z.arch_sandybridge"
+// WINDOWS: define linkonce_odr dso_local i32 @"?foo@?$templ@N@@QEAAHH@Z.arch_ivybridge"
+// WINDOWS: define linkonce_odr dso_local i32 @"?foo@?$templ@N@@QEAAHH@Z"
Index: test/CodeGenCXX/attr-target-mv-func-ptrs.cpp
===================================================================
--- test/CodeGenCXX/attr-target-mv-func-ptrs.cpp
+++ test/CodeGenCXX/attr-target-mv-func-ptrs.cpp
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=LINUX
+// RUN: %clang_cc1 -std=c++11 -triple x86_64-windows-pc -emit-llvm %s -o - | FileCheck %s --check-prefix=WINDOWS
 void temp();
 void temp(int);
 using FP = void(*)(int);
@@ -32,14 +33,23 @@
 }
 
 
-// CHECK: @_Z3fooi.ifunc 
-// CHECK: @_ZN1S3fooEi.ifunc
+// LINUX: @_Z3fooi.ifunc 
+// LINUX: @_ZN1S3fooEi.ifunc
 
-// CHECK: define i32 @_Z3barv()
+// LINUX: define i32 @_Z3barv()
 // Store to Free of ifunc
-// CHECK: store i32 (i32)* @_Z3fooi.ifunc
+// LINUX: store i32 (i32)* @_Z3fooi.ifunc
 // Store to Member of ifunc
-// CHECK: store { i64, i64 } { i64 ptrtoint (i32 (%struct.S*, i32)* @_ZN1S3fooEi.ifunc to i64), i64 0 }, { i64, i64 }* [[MEMBER:%[a-z]+]]
+// LINUX: store { i64, i64 } { i64 ptrtoint (i32 (%struct.S*, i32)* @_ZN1S3fooEi.ifunc to i64), i64 0 }, { i64, i64 }* [[MEMBER:%[a-z]+]]
 
 // Call to 'f' with the ifunc
-// CHECK: call void @_Z1fPFiiEM1SFiiE(i32 (i32)* @_Z3fooi.ifunc
+// LINUX: call void @_Z1fPFiiEM1SFiiE(i32 (i32)* @_Z3fooi.ifunc
+
+// WINDOWS: define dso_local i32 @"?bar@@YAHXZ"()
+// Store to Free
+// WINDOWS: store i32 (i32)* @"?foo@@YAHH@Z.resolver", i32 (i32)**
+// Store to Member
+// WINDOWS: store i8* bitcast (i32 (%struct.S*, i32)* @"?foo@S@@QEAAHH@Z.resolver" to i8*), i8**
+
+// Call to 'f'
+// WINDOWS: call void @"?f@@YAXP6AHH@ZP8S@@EAAHH@Z@Z"(i32 (i32)* @"?foo@@YAHH@Z.resolver", i8* bitcast (i32 (%struct.S*, i32)* @"?foo@S@@QEAAHH@Z.resolver" to i8*))
Index: test/CodeGenCXX/attr-target-mv-diff-ns.cpp
===================================================================
--- test/CodeGenCXX/attr-target-mv-diff-ns.cpp
+++ test/CodeGenCXX/attr-target-mv-diff-ns.cpp
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=LINUX
+// RUN: %clang_cc1 -std=c++11 -triple x86_64-windows-pc -emit-llvm %s -o - | FileCheck %s --check-prefix=WINDOWS
 // Test ensures that this properly differentiates between types in different 
 // namespaces.
 int __attribute__((target("sse4.2"))) foo(int) { return 0; }
@@ -17,38 +18,71 @@
   return foo(1) + ns::foo(2);
 }
 
-// CHECK: @_Z3fooi.ifunc = ifunc i32 (i32), i32 (i32)* ()* @_Z3fooi.resolver
-// CHECK: @_ZN2ns3fooEi.ifunc = ifunc i32 (i32), i32 (i32)* ()* @_ZN2ns3fooEi.resolver
-
-// CHECK: define i32 @_Z3fooi.sse4.2(i32)
-// CHECK: ret i32 0
-// CHECK: define i32 @_Z3fooi.arch_ivybridge(i32)
-// CHECK: ret i32 1
-// CHECK: define i32 @_Z3fooi(i32)
-// CHECK: ret i32 2
-
-// CHECK: define i32 @_ZN2ns3fooEi.sse4.2(i32)
-// CHECK: ret i32 0
-// CHECK: define i32 @_ZN2ns3fooEi.arch_ivybridge(i32)
-// CHECK: ret i32 1
-// CHECK: define i32 @_ZN2ns3fooEi(i32)
-// CHECK: ret i32 2
-
-// CHECK: define i32 @_Z3barv()
-// CHECK: call i32 @_Z3fooi.ifunc(i32 1)
-// CHECK: call i32 @_ZN2ns3fooEi.ifunc(i32 2)
-
-// CHECK: define i32 (i32)* @_Z3fooi.resolver() comdat
-// CHECK: ret i32 (i32)* @_Z3fooi.arch_sandybridge
-// CHECK: ret i32 (i32)* @_Z3fooi.arch_ivybridge
-// CHECK: ret i32 (i32)* @_Z3fooi.sse4.2
-// CHECK: ret i32 (i32)* @_Z3fooi
-//
-// CHECK: define i32 (i32)* @_ZN2ns3fooEi.resolver() comdat
-// CHECK: ret i32 (i32)* @_ZN2ns3fooEi.arch_sandybridge
-// CHECK: ret i32 (i32)* @_ZN2ns3fooEi.arch_ivybridge
-// CHECK: ret i32 (i32)* @_ZN2ns3fooEi.sse4.2
-// CHECK: ret i32 (i32)* @_ZN2ns3fooEi
-
-// CHECK: declare i32 @_Z3fooi.arch_sandybridge(i32)
-// CHECK: declare i32 @_ZN2ns3fooEi.arch_sandybridge(i32)
+// LINUX: @_Z3fooi.ifunc = ifunc i32 (i32), i32 (i32)* ()* @_Z3fooi.resolver
+// LINUX: @_ZN2ns3fooEi.ifunc = ifunc i32 (i32), i32 (i32)* ()* @_ZN2ns3fooEi.resolver
+
+// LINUX: define i32 @_Z3fooi.sse4.2(i32)
+// LINUX: ret i32 0
+// LINUX: define i32 @_Z3fooi.arch_ivybridge(i32)
+// LINUX: ret i32 1
+// LINUX: define i32 @_Z3fooi(i32)
+// LINUX: ret i32 2
+
+// WINDOWS: define dso_local i32 @"?foo@@YAHH@Z.sse4.2"(i32)
+// WINDOWS: ret i32 0
+// WINDOWS: define dso_local i32 @"?foo@@YAHH@Z.arch_ivybridge"(i32)
+// WINDOWS: ret i32 1
+// WINDOWS: define dso_local i32 @"?foo@@YAHH@Z"(i32)
+// WINDOWS: ret i32 2
+
+// LINUX: define i32 @_ZN2ns3fooEi.sse4.2(i32)
+// LINUX: ret i32 0
+// LINUX: define i32 @_ZN2ns3fooEi.arch_ivybridge(i32)
+// LINUX: ret i32 1
+// LINUX: define i32 @_ZN2ns3fooEi(i32)
+// LINUX: ret i32 2
+
+// WINDOWS: define dso_local i32 @"?foo@ns@@YAHH@Z.sse4.2"(i32)
+// WINDOWS: ret i32 0
+// WINDOWS: define dso_local i32 @"?foo@ns@@YAHH@Z.arch_ivybridge"(i32)
+// WINDOWS: ret i32 1
+// WINDOWS: define dso_local i32 @"?foo@ns@@YAHH@Z"(i32)
+// WINDOWS: ret i32 2
+
+// LINUX: define i32 @_Z3barv()
+// LINUX: call i32 @_Z3fooi.ifunc(i32 1)
+// LINUX: call i32 @_ZN2ns3fooEi.ifunc(i32 2)
+
+// WINDOWS: define dso_local i32 @"?bar@@YAHXZ"()
+// WINDOWS: call i32 @"?foo@@YAHH@Z.resolver"(i32 1)
+// WINDOWS: call i32 @"?foo@ns@@YAHH@Z.resolver"(i32 2)
+
+// LINUX: define i32 (i32)* @_Z3fooi.resolver() comdat
+// LINUX: ret i32 (i32)* @_Z3fooi.arch_sandybridge
+// LINUX: ret i32 (i32)* @_Z3fooi.arch_ivybridge
+// LINUX: ret i32 (i32)* @_Z3fooi.sse4.2
+// LINUX: ret i32 (i32)* @_Z3fooi
+
+// WINDOWS: define dso_local i32 @"?foo@@YAHH@Z.resolver"(i32) comdat
+// WINDOWS: call i32 @"?foo@@YAHH@Z.arch_sandybridge"(i32 %0)
+// WINDOWS: call i32 @"?foo@@YAHH@Z.arch_ivybridge"(i32 %0)
+// WINDOWS: call i32 @"?foo@@YAHH@Z.sse4.2"(i32 %0)
+// WINDOWS: call i32 @"?foo@@YAHH@Z"(i32 %0)
+
+// LINUX: define i32 (i32)* @_ZN2ns3fooEi.resolver() comdat
+// LINUX: ret i32 (i32)* @_ZN2ns3fooEi.arch_sandybridge
+// LINUX: ret i32 (i32)* @_ZN2ns3fooEi.arch_ivybridge
+// LINUX: ret i32 (i32)* @_ZN2ns3fooEi.sse4.2
+// LINUX: ret i32 (i32)* @_ZN2ns3fooEi
+
+// WINDOWS: define dso_local i32 @"?foo@ns@@YAHH@Z.resolver"(i32) comdat
+// WINDOWS: call i32 @"?foo@ns@@YAHH@Z.arch_sandybridge"(i32 %0)
+// WINDOWS: call i32 @"?foo@ns@@YAHH@Z.arch_ivybridge"(i32 %0)
+// WINDOWS: call i32 @"?foo@ns@@YAHH@Z.sse4.2"(i32 %0)
+// WINDOWS: call i32 @"?foo@ns@@YAHH@Z"(i32 %0)
+
+// LINUX: declare i32 @_Z3fooi.arch_sandybridge(i32)
+// LINUX: declare i32 @_ZN2ns3fooEi.arch_sandybridge(i32)
+
+// WINDOWS: declare dso_local i32 @"?foo@@YAHH@Z.arch_sandybridge"(i32)
+// WINDOWS: declare dso_local i32 @"?foo@ns@@YAHH@Z.arch_sandybridge"(i32)
Index: test/CodeGen/attr-target-mv.c
===================================================================
--- test/CodeGen/attr-target-mv.c
+++ test/CodeGen/attr-target-mv.c
@@ -1,4 +1,6 @@
-// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=LINUX
+// RUN: %clang_cc1 -triple x86_64-windows-pc -emit-llvm %s -o - | FileCheck %s --check-prefix=WINDOWS
+
 int __attribute__((target("sse4.2"))) foo(void) { return 0; }
 int __attribute__((target("arch=sandybridge"))) foo(void);
 int __attribute__((target("arch=ivybridge"))) foo(void) {return 1;}
@@ -25,82 +27,155 @@
 inline __attribute__((target("default"))) void foo_decls(void) {}
 inline __attribute__((target("sse4.2"))) void foo_decls(void) {}
 
-inline __attribute__((target("default"))) void foo_multi(void) {}
-inline __attribute__((target("avx,sse4.2"))) void foo_multi(void) {}
-inline __attribute__((target("sse4.2,fma4"))) void foo_multi(void) {}
-inline __attribute__((target("arch=ivybridge,fma4,sse4.2"))) void foo_multi(void) {}
+inline __attribute__((target("default"))) void foo_multi(int i, double d) {}
+inline __attribute__((target("avx,sse4.2"))) void foo_multi(int i, double d) {}
+inline __attribute__((target("sse4.2,fma4"))) void foo_multi(int i, double d) {}
+inline __attribute__((target("arch=ivybridge,fma4,sse4.2"))) void foo_multi(int i, double d) {}
 void bar4() {
-  foo_multi();
+  foo_multi(1, 5.0);
 }
 
-// CHECK: @foo.ifunc = ifunc i32 (), i32 ()* ()* @foo.resolver
-// CHECK: @foo_inline.ifunc = ifunc i32 (), i32 ()* ()* @foo_inline.resolver
-// CHECK: @foo_decls.ifunc = ifunc void (), void ()* ()* @foo_decls.resolver
-
-// CHECK: define i32 @foo.sse4.2()
-// CHECK: ret i32 0
-// CHECK: define i32 @foo.arch_ivybridge()
-// CHECK: ret i32 1
-// CHECK: define i32 @foo()
-// CHECK: ret i32 2
-// CHECK: define i32 @bar()
-// CHECK: call i32 @foo.ifunc()
-
-// CHECK: define i32 ()* @foo.resolver() comdat
-// CHECK: call void @__cpu_indicator_init()
-// CHECK: ret i32 ()* @foo.arch_sandybridge
-// CHECK: ret i32 ()* @foo.arch_ivybridge
-// CHECK: ret i32 ()* @foo.sse4.2
-// CHECK: ret i32 ()* @foo
-
-// CHECK: define i32 @bar2()
-// CHECK: call i32 @foo_inline.ifunc()
-
-// CHECK: define i32 ()* @foo_inline.resolver() comdat
-// CHECK: call void @__cpu_indicator_init()
-// CHECK: ret i32 ()* @foo_inline.arch_sandybridge
-// CHECK: ret i32 ()* @foo_inline.arch_ivybridge
-// CHECK: ret i32 ()* @foo_inline.sse4.2
-// CHECK: ret i32 ()* @foo_inline
-
-// CHECK: define void @bar3()
-// CHECK: call void @foo_decls.ifunc()
-
-// CHECK: define void ()* @foo_decls.resolver() comdat
-// CHECK: ret void ()* @foo_decls.sse4.2
-// CHECK: ret void ()* @foo_decls
-
-// CHECK: define void @bar4()
-// CHECK: call void @foo_multi.ifunc()
-
-// CHECK: define void ()* @foo_multi.resolver() comdat
-// CHECK: and i32 %{{.*}}, 4352
-// CHECK: icmp eq i32 %{{.*}}, 4352
-// CHECK: ret void ()* @foo_multi.fma4_sse4.2
-// CHECK: icmp eq i32 %{{.*}}, 12
-// CHECK: and i32 %{{.*}}, 4352
-// CHECK: icmp eq i32 %{{.*}}, 4352
-// CHECK: ret void ()* @foo_multi.arch_ivybridge_fma4_sse4.2
-// CHECK: and i32 %{{.*}}, 768
-// CHECK: icmp eq i32 %{{.*}}, 768
-// CHECK: ret void ()* @foo_multi.avx_sse4.2
-// CHECK: ret void ()* @foo_multi
-
-// CHECK: declare i32 @foo.arch_sandybridge()
-
-// CHECK: define linkonce i32 @foo_inline.sse4.2()
-// CHECK: ret i32 0
-
-// CHECK: declare i32 @foo_inline.arch_sandybridge()
-//
-// CHECK: define linkonce i32 @foo_inline.arch_ivybridge()
-// CHECK: ret i32 1
-// CHECK: define linkonce i32 @foo_inline()
-// CHECK: ret i32 2
-
-// CHECK: define linkonce void @foo_decls()
-// CHECK: define linkonce void @foo_decls.sse4.2()
-
-// CHECK: define linkonce void @foo_multi.avx_sse4.2()
-// CHECK: define linkonce void @foo_multi.fma4_sse4.2()
-// CHECK: define linkonce void @foo_multi.arch_ivybridge_fma4_sse4.2()
+// LINUX: @foo.ifunc = ifunc i32 (), i32 ()* ()* @foo.resolver
+// LINUX: @foo_inline.ifunc = ifunc i32 (), i32 ()* ()* @foo_inline.resolver
+// LINUX: @foo_decls.ifunc = ifunc void (), void ()* ()* @foo_decls.resolver
+// LINUX: @foo_multi.ifunc = ifunc void (i32, double), void (i32, double)* ()* @foo_multi.resolver
+
+// LINUX: define i32 @foo.sse4.2()
+// LINUX: ret i32 0
+// LINUX: define i32 @foo.arch_ivybridge()
+// LINUX: ret i32 1
+// LINUX: define i32 @foo()
+// LINUX: ret i32 2
+// LINUX: define i32 @bar()
+// LINUX: call i32 @foo.ifunc()
+
+// WINDOWS: define dso_local i32 @foo.sse4.2()
+// WINDOWS: ret i32 0
+// WINDOWS: define dso_local i32 @foo.arch_ivybridge()
+// WINDOWS: ret i32 1
+// WINDOWS: define dso_local i32 @foo()
+// WINDOWS: ret i32 2
+// WINDOWS: define dso_local i32 @bar()
+// WINDOWS: call i32 @foo.resolver()
+
+// LINUX: define i32 ()* @foo.resolver() comdat
+// LINUX: call void @__cpu_indicator_init()
+// LINUX: ret i32 ()* @foo.arch_sandybridge
+// LINUX: ret i32 ()* @foo.arch_ivybridge
+// LINUX: ret i32 ()* @foo.sse4.2
+// LINUX: ret i32 ()* @foo
+
+// WINDOWS: define dso_local i32 @foo.resolver() comdat
+// WINDOWS: call void @__cpu_indicator_init()
+// WINDOWS: call i32 @foo.arch_sandybridge
+// WINDOWS: call i32 @foo.arch_ivybridge
+// WINDOWS: call i32 @foo.sse4.2
+// WINDOWS: call i32 @foo
+
+// LINUX: define i32 @bar2()
+// LINUX: call i32 @foo_inline.ifunc()
+
+// WINDOWS: define dso_local i32 @bar2()
+// WINDOWS: call i32 @foo_inline.resolver()
+
+// LINUX: define i32 ()* @foo_inline.resolver() comdat
+// LINUX: call void @__cpu_indicator_init()
+// LINUX: ret i32 ()* @foo_inline.arch_sandybridge
+// LINUX: ret i32 ()* @foo_inline.arch_ivybridge
+// LINUX: ret i32 ()* @foo_inline.sse4.2
+// LINUX: ret i32 ()* @foo_inline
+
+// WINDOWS: define dso_local i32 @foo_inline.resolver() comdat
+// WINDOWS: call void @__cpu_indicator_init()
+// WINDOWS: call i32 @foo_inline.arch_sandybridge
+// WINDOWS: call i32 @foo_inline.arch_ivybridge
+// WINDOWS: call i32 @foo_inline.sse4.2
+// WINDOWS: call i32 @foo_inline
+
+// LINUX: define void @bar3()
+// LINUX: call void @foo_decls.ifunc()
+
+// WINDOWS: define dso_local void @bar3()
+// WINDOWS: call void @foo_decls.resolver()
+
+// LINUX: define void ()* @foo_decls.resolver() comdat
+// LINUX: ret void ()* @foo_decls.sse4.2
+// LINUX: ret void ()* @foo_decls
+
+// WINDOWS: define dso_local void @foo_decls.resolver() comdat
+// WINDOWS: call void @foo_decls.sse4.2
+// Windows: call void @foo_decls
+
+// LINUX: define void @bar4()
+// LINUX: call void @foo_multi.ifunc(i32 1, double 5.{{[0+e]*}})
+
+// WINDOWS: define dso_local void @bar4()
+// WINDOWS: call void @foo_multi.resolver(i32 1, double 5.{{[0+e]*}})
+
+// LINUX: define void (i32, double)* @foo_multi.resolver() comdat
+// LINUX: and i32 %{{.*}}, 4352
+// LINUX: icmp eq i32 %{{.*}}, 4352
+// LINUX: ret void (i32, double)* @foo_multi.fma4_sse4.2
+// LINUX: icmp eq i32 %{{.*}}, 12
+// LINUX: and i32 %{{.*}}, 4352
+// LINUX: icmp eq i32 %{{.*}}, 4352
+// LINUX: ret void (i32, double)* @foo_multi.arch_ivybridge_fma4_sse4.2
+// LINUX: and i32 %{{.*}}, 768
+// LINUX: icmp eq i32 %{{.*}}, 768
+// LINUX: ret void (i32, double)* @foo_multi.avx_sse4.2
+// LINUX: ret void (i32, double)* @foo_multi
+
+// WINDOWS: define dso_local void @foo_multi.resolver(i32, double) comdat
+// WINDOWS: and i32 %{{.*}}, 4352
+// WINDOWS: icmp eq i32 %{{.*}}, 4352
+// WINDOWS: call void @foo_multi.fma4_sse4.2(i32 %0, double %1)
+// WINDOWS-NEXT: ret void
+// WINDOWS: icmp eq i32 %{{.*}}, 12
+// WINDOWS: and i32 %{{.*}}, 4352
+// WINDOWS: icmp eq i32 %{{.*}}, 4352
+// WINDOWS: call void @foo_multi.arch_ivybridge_fma4_sse4.2(i32 %0, double %1)
+// WINDOWS-NEXT: ret void
+// WINDOWS: and i32 %{{.*}}, 768
+// WINDOWS: icmp eq i32 %{{.*}}, 768
+// WINDOWS: call void @foo_multi.avx_sse4.2(i32 %0, double %1)
+// WINDOWS-NEXT: ret void
+// WINDOWS: call void @foo_multi(i32 %0, double %1)
+// WINDOWS-NEXT: ret void
+
+// LINUX: declare i32 @foo.arch_sandybridge()
+
+// WINDOWS: declare dso_local i32 @foo.arch_sandybridge()
+
+// LINUX: define linkonce i32 @foo_inline.sse4.2()
+// LINUX: ret i32 0
+
+// WINDOWS: define linkonce_odr dso_local i32 @foo_inline.sse4.2()
+// WINDOWS: ret i32 0
+
+// LINUX: declare i32 @foo_inline.arch_sandybridge()
+
+// WINDOWS: declare dso_local i32 @foo_inline.arch_sandybridge()
+
+// LINUX: define linkonce i32 @foo_inline.arch_ivybridge()
+// LINUX: ret i32 1
+// LINUX: define linkonce i32 @foo_inline()
+// LINUX: ret i32 2
+
+// WINDOWS: define linkonce_odr dso_local i32 @foo_inline.arch_ivybridge()
+// WINDOWS: ret i32 1
+// WINDOWS: define linkonce_odr dso_local i32 @foo_inline()
+// WINDOWS: ret i32 2
+
+// LINUX: define linkonce void @foo_decls()
+// LINUX: define linkonce void @foo_decls.sse4.2()
+
+// WINDOWS: define linkonce_odr dso_local void @foo_decls()
+// WINDOWS: define linkonce_odr dso_local void @foo_decls.sse4.2()
+
+// LINUX: define linkonce void @foo_multi.avx_sse4.2(i32 %{{[^,]+}}, double %{{[^\)]+}})
+// LINUX: define linkonce void @foo_multi.fma4_sse4.2(i32 %{{[^,]+}}, double %{{[^\)]+}})
+// LINUX: define linkonce void @foo_multi.arch_ivybridge_fma4_sse4.2(i32 %{{[^,]+}}, double %{{[^\)]+}})
+
+// WINDOWS: define linkonce_odr dso_local void @foo_multi.avx_sse4.2(i32 %{{[^,]+}}, double %{{[^\)]+}})
+// WINDOWS: define linkonce_odr dso_local void @foo_multi.fma4_sse4.2(i32 %{{[^,]+}}, double %{{[^\)]+}})
+// WINDOWS: define linkonce_odr dso_local void @foo_multi.arch_ivybridge_fma4_sse4.2(i32 %{{[^,]+}}, double %{{[^\)]+}})
Index: test/CodeGen/attr-target-mv-va-args.c
===================================================================
--- test/CodeGen/attr-target-mv-va-args.c
+++ test/CodeGen/attr-target-mv-va-args.c
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=LINUX
+// RUN: %clang_cc1 -triple x86_64-windows-pc -emit-llvm %s -o - | FileCheck %s --check-prefix=WINDOWS
 int __attribute__((target("sse4.2"))) foo(int i, ...) { return 0; }
 int __attribute__((target("arch=sandybridge"))) foo(int i, ...);
 int __attribute__((target("arch=ivybridge"))) foo(int i, ...) {return 1;}
@@ -8,19 +9,37 @@
   return foo(1, 'a', 1.1) + foo(2, 2.2, "asdf");
 }
 
-// CHECK: @foo.ifunc = ifunc i32 (i32, ...), i32 (i32, ...)* ()* @foo.resolver
-// CHECK: define i32 @foo.sse4.2(i32 %i, ...)
-// CHECK: ret i32 0
-// CHECK: define i32 @foo.arch_ivybridge(i32 %i, ...)
-// CHECK: ret i32 1
-// CHECK: define i32 @foo(i32 %i, ...)
-// CHECK: ret i32 2
-// CHECK: define i32 @bar()
-// CHECK: call i32 (i32, ...) @foo.ifunc(i32 1, i32 97, double
-// CHECK: call i32 (i32, ...) @foo.ifunc(i32 2, double 2.2{{[0-9Ee+]+}}, i8* getelementptr inbounds 
-// CHECK: define i32 (i32, ...)* @foo.resolver() comdat
-// CHECK: ret i32 (i32, ...)* @foo.arch_sandybridge
-// CHECK: ret i32 (i32, ...)* @foo.arch_ivybridge
-// CHECK: ret i32 (i32, ...)* @foo.sse4.2
-// CHECK: ret i32 (i32, ...)* @foo
-// CHECK: declare i32 @foo.arch_sandybridge(i32, ...)
+// LINUX: @foo.ifunc = ifunc i32 (i32, ...), i32 (i32, ...)* ()* @foo.resolver
+// LINUX: define i32 @foo.sse4.2(i32 %i, ...)
+// LINUX: ret i32 0
+// LINUX: define i32 @foo.arch_ivybridge(i32 %i, ...)
+// LINUX: ret i32 1
+// LINUX: define i32 @foo(i32 %i, ...)
+// LINUX: ret i32 2
+// LINUX: define i32 @bar()
+// LINUX: call i32 (i32, ...) @foo.ifunc(i32 1, i32 97, double
+// LINUX: call i32 (i32, ...) @foo.ifunc(i32 2, double 2.2{{[0-9Ee+]+}}, i8* getelementptr inbounds 
+
+// LINUX: define i32 (i32, ...)* @foo.resolver() comdat
+// LINUX: ret i32 (i32, ...)* @foo.arch_sandybridge
+// LINUX: ret i32 (i32, ...)* @foo.arch_ivybridge
+// LINUX: ret i32 (i32, ...)* @foo.sse4.2
+// LINUX: ret i32 (i32, ...)* @foo
+// LINUX: declare i32 @foo.arch_sandybridge(i32, ...)
+
+// WINDOWS: define dso_local i32 @foo.sse4.2(i32 %i, ...)
+// WINDOWS: ret i32 0
+// WINDOWS: define dso_local i32 @foo.arch_ivybridge(i32 %i, ...)
+// WINDOWS: ret i32 1
+// WINDOWS: define dso_local i32 @foo(i32 %i, ...)
+// WINDOWS: ret i32 2
+// WINDOWS: define dso_local i32 @bar()
+// WINDOWS: call i32 (i32, ...) @foo.resolver(i32 1, i32 97, double
+// WINDOWS: call i32 (i32, ...) @foo.resolver(i32 2, double 2.2{{[0-9Ee+]+}}, i8* getelementptr inbounds 
+
+// WINDOWS: define dso_local i32 @foo.resolver(i32, ...) comdat
+// WINDOWS: call i32 (i32, ...) @foo.arch_sandybridge
+// WINDOWS: call i32 (i32, ...) @foo.arch_ivybridge
+// WINDOWS: call i32 (i32, ...) @foo.sse4.2
+// WINDOWS: call i32 (i32, ...) @foo
+// WINDOWS: declare dso_local i32 @foo.arch_sandybridge(i32, ...)
Index: test/CodeGen/attr-target-mv-func-ptrs.c
===================================================================
--- test/CodeGen/attr-target-mv-func-ptrs.c
+++ test/CodeGen/attr-target-mv-func-ptrs.c
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=LINUX
+// RUN: %clang_cc1 -triple x86_64-windows-pc -emit-llvm %s -o - | FileCheck %s --check-prefix=WINDOWS
 int __attribute__((target("sse4.2"))) foo(int i) { return 0; }
 int __attribute__((target("arch=sandybridge"))) foo(int);
 int __attribute__((target("arch=ivybridge"))) foo(int i) {return 1;}
@@ -16,17 +17,31 @@
   return Free(1) + Free(2);
 }
 
-// CHECK: @foo.ifunc = ifunc i32 (i32), i32 (i32)* ()* @foo.resolver
-// CHECK: define i32 @foo.sse4.2(
-// CHECK: ret i32 0
-// CHECK: define i32 @foo.arch_ivybridge(
-// CHECK: ret i32 1
-// CHECK: define i32 @foo(
-// CHECK: ret i32 2
+// LINUX: @foo.ifunc = ifunc i32 (i32), i32 (i32)* ()* @foo.resolver
+// LINUX: define i32 @foo.sse4.2(
+// LINUX: ret i32 0
+// LINUX: define i32 @foo.arch_ivybridge(
+// LINUX: ret i32 1
+// LINUX: define i32 @foo(
+// LINUX: ret i32 2
 
-// CHECK: define i32 @bar()
-// CHECK: call void @func(i32 (i32)* @foo.ifunc)
-// CHECK: store i32 (i32)* @foo.ifunc
-// CHECK: store i32 (i32)* @foo.ifunc
+// WINDOWS: define dso_local i32 @foo.sse4.2(
+// WINDOWS: ret i32 0
+// WINDOWS: define dso_local i32 @foo.arch_ivybridge(
+// WINDOWS: ret i32 1
+// WINDOWS: define dso_local i32 @foo(
+// WINDOWS: ret i32 2
 
-// CHECK: declare i32 @foo.arch_sandybridge(
+// LINUX: define i32 @bar()
+// LINUX: call void @func(i32 (i32)* @foo.ifunc)
+// LINUX: store i32 (i32)* @foo.ifunc
+// LINUX: store i32 (i32)* @foo.ifunc
+
+// WINDOWS: define dso_local i32 @bar()
+// WINDOWS: call void @func(i32 (i32)* @foo.resolver)
+// WINDOWS: store i32 (i32)* @foo.resolver
+// WINDOWS: store i32 (i32)* @foo.resolver
+
+// LINUX: declare i32 @foo.arch_sandybridge(
+
+// WINDOWS: declare dso_local i32 @foo.arch_sandybridge(
Index: test/CodeGen/attr-cpuspecific.c
===================================================================
--- test/CodeGen/attr-cpuspecific.c
+++ test/CodeGen/attr-cpuspecific.c
@@ -1,100 +1,210 @@
-// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,LINUX
+// RUN: %clang_cc1 -triple x86_64-windows-pc -fms-compatibility -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,WINDOWS
+
+#ifdef _WIN64
+#define ATTR(X) __declspec(X)
+#else
+#define ATTR(X) __attribute__((X))
+#endif // _MSC_VER
 
 
 // Each called version should have an IFunc.
-// CHECK: @SingleVersion.ifunc = ifunc void (), void ()* ()* @SingleVersion.resolver
-// CHECK: @TwoVersions.ifunc = ifunc void (), void ()* ()* @TwoVersions.resolver
-// CHECK: @TwoVersionsSameAttr.ifunc = ifunc void (), void ()* ()* @TwoVersionsSameAttr.resolver
-// CHECK: @ThreeVersionsSameAttr.ifunc = ifunc void (), void ()* ()* @ThreeVersionsSameAttr.resolver
+// LINUX: @SingleVersion.ifunc = ifunc void (), void ()* ()* @SingleVersion.resolver
+// LINUX: @TwoVersions.ifunc = ifunc void (), void ()* ()* @TwoVersions.resolver
+// LINUX: @TwoVersionsSameAttr.ifunc = ifunc void (), void ()* ()* @TwoVersionsSameAttr.resolver
+// LINUX: @ThreeVersionsSameAttr.ifunc = ifunc void (), void ()* ()* @ThreeVersionsSameAttr.resolver
 
-__attribute__((cpu_specific(ivybridge)))
+ATTR(cpu_specific(ivybridge))
 void SingleVersion(void){}
-// CHECK: define void @SingleVersion.S() #[[S:[0-9]+]]
+// LINUX: define void @SingleVersion.S() #[[S:[0-9]+]]
+// WINDOWS: define dso_local void @SingleVersion.S() #[[S:[0-9]+]]
 
-__attribute__((cpu_specific(ivybridge)))
+ATTR(cpu_specific(ivybridge))
 void NotCalled(void){}
-// CHECK: define void @NotCalled.S() #[[S]]
+// LINUX: define void @NotCalled.S() #[[S]]
+// WINDOWS: define dso_local void @NotCalled.S() #[[S:[0-9]+]]
 
 // Done before any of the implementations.
-__attribute__((cpu_dispatch(ivybridge, knl)))
+ATTR(cpu_dispatch(ivybridge, knl))
 void TwoVersions(void);
-// CHECK: define void ()* @TwoVersions.resolver()
-// CHECK: call void @__cpu_indicator_init
-// CHECK: ret void ()* @TwoVersions.Z
-// CHECK: ret void ()* @TwoVersions.S
-// CHECK: call void @llvm.trap
-// CHECK: unreachable
-
-__attribute__((cpu_specific(ivybridge)))
+// LINUX: define void ()* @TwoVersions.resolver()
+// LINUX: call void @__cpu_indicator_init
+// LINUX: ret void ()* @TwoVersions.Z
+// LINUX: ret void ()* @TwoVersions.S
+// LINUX: call void @llvm.trap
+// LINUX: unreachable
+
+// WINDOWS: define dso_local void @TwoVersions()
+// WINDOWS: call void @__cpu_indicator_init()
+// WINDOWS: call void @TwoVersions.Z()
+// WINDOWS-NEXT: ret void
+// WINDOWS: call void @TwoVersions.S()
+// WINDOWS-NEXT: ret void
+// WINDOWS: call void @llvm.trap
+// WINDOWS: unreachable
+
+ATTR(cpu_specific(ivybridge))
 void TwoVersions(void){}
-// CHECK: define void @TwoVersions.S() #[[S]]
+// CHECK: define {{.*}}void @TwoVersions.S() #[[S]]
 
-__attribute__((cpu_specific(knl)))
+ATTR(cpu_specific(knl))
 void TwoVersions(void){}
-// CHECK: define void @TwoVersions.Z() #[[K:[0-9]+]]
+// CHECK: define {{.*}}void @TwoVersions.Z() #[[K:[0-9]+]]
 
-__attribute__((cpu_specific(ivybridge, knl)))
+ATTR(cpu_specific(ivybridge, knl))
 void TwoVersionsSameAttr(void){}
-// CHECK: define void @TwoVersionsSameAttr.S() #[[S]]
-// CHECK: define void @TwoVersionsSameAttr.Z() #[[K]]
+// CHECK: define {{.*}}void @TwoVersionsSameAttr.S() #[[S]]
+// CHECK: define {{.*}}void @TwoVersionsSameAttr.Z() #[[K]]
 
-__attribute__((cpu_specific(atom, ivybridge, knl)))
+ATTR(cpu_specific(atom, ivybridge, knl))
 void ThreeVersionsSameAttr(void){}
-// CHECK: define void @ThreeVersionsSameAttr.O() #[[O:[0-9]+]]
-// CHECK: define void @ThreeVersionsSameAttr.S() #[[S]]
-// CHECK: define void @ThreeVersionsSameAttr.Z() #[[K]]
+// CHECK: define {{.*}}void @ThreeVersionsSameAttr.O() #[[O:[0-9]+]]
+// CHECK: define {{.*}}void @ThreeVersionsSameAttr.S() #[[S]]
+// CHECK: define {{.*}}void @ThreeVersionsSameAttr.Z() #[[K]]
 
 void usages() {
   SingleVersion();
-  // CHECK: @SingleVersion.ifunc()
+  // LINUX: @SingleVersion.ifunc()
+  // WINDOWS: @SingleVersion()
   TwoVersions();
-  // CHECK: @TwoVersions.ifunc()
+  // LINUX: @TwoVersions.ifunc()
+  // WINDOWS: @TwoVersions()
   TwoVersionsSameAttr();
-  // CHECK: @TwoVersionsSameAttr.ifunc()
+  // LINUX: @TwoVersionsSameAttr.ifunc()
+  // WINDOWS: @TwoVersionsSameAttr()
   ThreeVersionsSameAttr();
-  // CHECK: @ThreeVersionsSameAttr.ifunc()
+  // LINUX: @ThreeVersionsSameAttr.ifunc()
+  // WINDOWS: @ThreeVersionsSameAttr()
 }
 
 // has an extra config to emit!
-__attribute__((cpu_dispatch(ivybridge, knl, atom)))
+ATTR(cpu_dispatch(ivybridge, knl, atom))
 void TwoVersionsSameAttr(void);
-// CHECK: define void ()* @TwoVersionsSameAttr.resolver()
-// CHECK: ret void ()* @TwoVersionsSameAttr.Z
-// CHECK: ret void ()* @TwoVersionsSameAttr.S
-// CHECK: ret void ()* @TwoVersionsSameAttr.O
-// CHECK: call void @llvm.trap
-// CHECK: unreachable
-
-__attribute__((cpu_dispatch(atom, ivybridge, knl)))
+// LINUX: define void ()* @TwoVersionsSameAttr.resolver()
+// LINUX: ret void ()* @TwoVersionsSameAttr.Z
+// LINUX: ret void ()* @TwoVersionsSameAttr.S
+// LINUX: ret void ()* @TwoVersionsSameAttr.O
+// LINUX: call void @llvm.trap
+// LINUX: unreachable
+
+// WINDOWS: define dso_local void @TwoVersionsSameAttr()
+// WINDOWS: call void @TwoVersionsSameAttr.Z
+// WINDOWS-NEXT: ret void
+// WINDOWS: call void @TwoVersionsSameAttr.S
+// WINDOWS-NEXT: ret void
+// WINDOWS: call void @TwoVersionsSameAttr.O
+// WINDOWS-NEXT: ret void
+// WINDOWS: call void @llvm.trap
+// WINDOWS: unreachable
+
+ATTR(cpu_dispatch(atom, ivybridge, knl))
 void ThreeVersionsSameAttr(void){}
-// CHECK: define void ()* @ThreeVersionsSameAttr.resolver()
-// CHECK: call void @__cpu_indicator_init
-// CHECK: ret void ()* @ThreeVersionsSameAttr.Z
-// CHECK: ret void ()* @ThreeVersionsSameAttr.S
-// CHECK: ret void ()* @ThreeVersionsSameAttr.O
-// CHECK: call void @llvm.trap
-// CHECK: unreachable
+// LINUX: define void ()* @ThreeVersionsSameAttr.resolver()
+// LINUX: call void @__cpu_indicator_init
+// LINUX: ret void ()* @ThreeVersionsSameAttr.Z
+// LINUX: ret void ()* @ThreeVersionsSameAttr.S
+// LINUX: ret void ()* @ThreeVersionsSameAttr.O
+// LINUX: call void @llvm.trap
+// LINUX: unreachable
+
+// WINDOWS: define dso_local void @ThreeVersionsSameAttr()
+// WINDOWS: call void @__cpu_indicator_init
+// WINDOWS: call void @ThreeVersionsSameAttr.Z
+// WINDOWS-NEXT: ret void
+// WINDOWS: call void @ThreeVersionsSameAttr.S
+// WINDOWS-NEXT: ret void
+// WINDOWS: call void @ThreeVersionsSameAttr.O
+// WINDOWS-NEXT: ret void
+// WINDOWS: call void @llvm.trap
+// WINDOWS: unreachable
 
 // No Cpu Specific options.
-__attribute__((cpu_dispatch(atom, ivybridge, knl)))
+ATTR(cpu_dispatch(atom, ivybridge, knl))
 void NoSpecifics(void);
-// CHECK: define void ()* @NoSpecifics.resolver()
-// CHECK: call void @__cpu_indicator_init
-// CHECK: ret void ()* @NoSpecifics.Z
-// CHECK: ret void ()* @NoSpecifics.S
-// CHECK: ret void ()* @NoSpecifics.O
-// CHECK: call void @llvm.trap
-// CHECK: unreachable
-
-__attribute__((cpu_dispatch(atom, generic, ivybridge, knl)))
+// LINUX: define void ()* @NoSpecifics.resolver()
+// LINUX: call void @__cpu_indicator_init
+// LINUX: ret void ()* @NoSpecifics.Z
+// LINUX: ret void ()* @NoSpecifics.S
+// LINUX: ret void ()* @NoSpecifics.O
+// LINUX: call void @llvm.trap
+// LINUX: unreachable
+
+// WINDOWS: define dso_local void @NoSpecifics()
+// WINDOWS: call void @__cpu_indicator_init
+// WINDOWS: call void @NoSpecifics.Z
+// WINDOWS-NEXT: ret void
+// WINDOWS: call void @NoSpecifics.S
+// WINDOWS-NEXT: ret void
+// WINDOWS: call void @NoSpecifics.O
+// WINDOWS-NEXT: ret void
+// WINDOWS: call void @llvm.trap
+// WINDOWS: unreachable
+
+ATTR(cpu_dispatch(atom, generic, ivybridge, knl))
 void HasGeneric(void);
-// CHECK: define void ()* @HasGeneric.resolver()
-// CHECK: call void @__cpu_indicator_init
-// CHECK: ret void ()* @HasGeneric.Z
-// CHECK: ret void ()* @HasGeneric.S
-// CHECK: ret void ()* @HasGeneric.O
-// CHECK: ret void ()* @HasGeneric.A
-// CHECK-NOT: call void @llvm.trap
+// LINUX: define void ()* @HasGeneric.resolver()
+// LINUX: call void @__cpu_indicator_init
+// LINUX: ret void ()* @HasGeneric.Z
+// LINUX: ret void ()* @HasGeneric.S
+// LINUX: ret void ()* @HasGeneric.O
+// LINUX: ret void ()* @HasGeneric.A
+// LINUX-NOT: call void @llvm.trap
+
+// WINDOWS: define dso_local void @HasGeneric()
+// WINDOWS: call void @__cpu_indicator_init
+// WINDOWS: call void @HasGeneric.Z
+// WINDOWS-NEXT: ret void
+// WINDOWS: call void @HasGeneric.S
+// WINDOWS-NEXT: ret void
+// WINDOWS: call void @HasGeneric.O
+// WINDOWS-NEXT: ret void
+// WINDOWS: call void @HasGeneric.A
+// WINDOWS-NEXT: ret void
+// WINDOWS-NOT: call void @llvm.trap
+
+ATTR(cpu_dispatch(atom, generic, ivybridge, knl))
+void HasParams(int i, double d);
+// LINUX: define void (i32, double)* @HasParams.resolver()
+// LINUX: call void @__cpu_indicator_init
+// LINUX: ret void (i32, double)* @HasParams.Z
+// LINUX: ret void (i32, double)* @HasParams.S
+// LINUX: ret void (i32, double)* @HasParams.O
+// LINUX: ret void (i32, double)* @HasParams.A
+// LINUX-NOT: call void @llvm.trap
+
+// WINDOWS: define dso_local void @HasParams(i32, double)
+// WINDOWS: call void @__cpu_indicator_init
+// WINDOWS: call void @HasParams.Z(i32 %0, double %1)
+// WINDOWS-NEXT: ret void
+// WINDOWS: call void @HasParams.S(i32 %0, double %1)
+// WINDOWS-NEXT: ret void
+// WINDOWS: call void @HasParams.O(i32 %0, double %1)
+// WINDOWS-NEXT: ret void
+// WINDOWS: call void @HasParams.A(i32 %0, double %1)
+// WINDOWS-NEXT: ret void
+// WINDOWS-NOT: call void @llvm.trap
+
+ATTR(cpu_dispatch(atom, generic, ivybridge, knl))
+int HasParamsAndReturn(int i, double d);
+// LINUX: define i32 (i32, double)* @HasParamsAndReturn.resolver()
+// LINUX: call void @__cpu_indicator_init
+// LINUX: ret i32 (i32, double)* @HasParamsAndReturn.Z
+// LINUX: ret i32 (i32, double)* @HasParamsAndReturn.S
+// LINUX: ret i32 (i32, double)* @HasParamsAndReturn.O
+// LINUX: ret i32 (i32, double)* @HasParamsAndReturn.A
+// LINUX-NOT: call void @llvm.trap
+
+// WINDOWS: define dso_local i32 @HasParamsAndReturn(i32, double)
+// WINDOWS: call void @__cpu_indicator_init
+// WINDOWS: %[[RET:.+]] = call i32 @HasParamsAndReturn.Z(i32 %0, double %1)
+// WINDOWS-NEXT: ret i32 %[[RET]]
+// WINDOWS: %[[RET:.+]] = call i32 @HasParamsAndReturn.S(i32 %0, double %1)
+// WINDOWS-NEXT: ret i32 %[[RET]]
+// WINDOWS: %[[RET:.+]] = call i32 @HasParamsAndReturn.O(i32 %0, double %1)
+// WINDOWS-NEXT: ret i32 %[[RET]]
+// WINDOWS: %[[RET:.+]] = call i32 @HasParamsAndReturn.A(i32 %0, double %1)
+// WINDOWS-NEXT: ret i32 %[[RET]]
+// WINDOWS-NOT: call void @llvm.trap
 
 // CHECK: attributes #[[S]] = {{.*}}"target-features"="+avx,+cmov,+f16c,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
 // CHECK: attributes #[[K]] = {{.*}}"target-features"="+adx,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+cmov,+f16c,+fma,+lzcnt,+mmx,+movbe,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
Index: lib/CodeGen/CodeGenModule.h
===================================================================
--- lib/CodeGen/CodeGenModule.h
+++ lib/CodeGen/CodeGenModule.h
@@ -1293,9 +1293,9 @@
       llvm::AttributeList ExtraAttrs = llvm::AttributeList(),
       ForDefinition_t IsForDefinition = NotForDefinition);
 
-  llvm::Constant *GetOrCreateMultiVersionIFunc(GlobalDecl GD,
-                                               llvm::Type *DeclTy,
-                                               const FunctionDecl *FD);
+  llvm::Constant *GetOrCreateMultiVersionResolver(GlobalDecl GD,
+                                                  llvm::Type *DeclTy,
+                                                  const FunctionDecl *FD);
   void UpdateMultiVersionNames(GlobalDecl GD, const FunctionDecl *FD);
 
   llvm::Constant *GetOrCreateLLVMGlobal(StringRef MangledName,
Index: lib/CodeGen/CodeGenModule.cpp
===================================================================
--- lib/CodeGen/CodeGenModule.cpp
+++ lib/CodeGen/CodeGenModule.cpp
@@ -892,10 +892,11 @@
 static void AppendCPUSpecificCPUDispatchMangling(const CodeGenModule &CGM,
                                                  const CPUSpecificAttr *Attr,
                                                  raw_ostream &Out) {
-  // cpu_specific gets the current name, dispatch gets the resolver.
+  // cpu_specific gets the current name, dispatch gets the resolver if IFunc is
+  // supported.
   if (Attr)
     Out << getCPUSpecificMangling(CGM, Attr->getCurCPUName()->getName());
-  else
+  else if (CGM.getTarget().supportsIFunc())
     Out << ".resolver";
 }
 
@@ -2507,13 +2508,19 @@
                                TA->getArchitecture(), Feats);
         });
 
-    llvm::Function *ResolverFunc = cast<llvm::Function>(
-        GetGlobalValue((getMangledName(GD) + ".resolver").str()));
+    llvm::Function *ResolverFunc;
+    const TargetInfo &TI = getTarget();
+
+    if (TI.supportsIFunc() || FD->isTargetMultiVersion())
+      ResolverFunc = cast<llvm::Function>(
+          GetGlobalValue((getMangledName(GD) + ".resolver").str()));
+    else
+      ResolverFunc = cast<llvm::Function>(GetGlobalValue(getMangledName(GD)));
+
     if (supportsCOMDAT())
       ResolverFunc->setComdat(
           getModule().getOrInsertComdat(ResolverFunc->getName()));
 
-    const TargetInfo &TI = getTarget();
     std::stable_sort(
         Options.begin(), Options.end(),
         [&TI](const CodeGenFunction::MultiVersionResolverOption &LHS,
@@ -2532,14 +2539,23 @@
   assert(DD && "Not a cpu_dispatch Function?");
   llvm::Type *DeclTy = getTypes().ConvertTypeForMem(FD->getType());
 
+
   StringRef ResolverName = getMangledName(GD);
-  llvm::Type *ResolverType = llvm::FunctionType::get(
-      llvm::PointerType::get(DeclTy,
-                             Context.getTargetAddressSpace(FD->getType())),
-      false);
-  auto *ResolverFunc = cast<llvm::Function>(
-      GetOrCreateLLVMFunction(ResolverName, ResolverType, GlobalDecl{},
-                              /*ForVTable=*/false));
+
+  llvm::Type *ResolverType;
+  GlobalDecl ResolverGD;
+  if (getTarget().supportsIFunc())
+    ResolverType = llvm::FunctionType::get(
+        llvm::PointerType::get(DeclTy,
+                               Context.getTargetAddressSpace(FD->getType())),
+        false);
+  else {
+    ResolverType = DeclTy;
+    ResolverGD = GD;
+  }
+
+  auto *ResolverFunc = cast<llvm::Function>(GetOrCreateLLVMFunction(
+      ResolverName, ResolverType, ResolverGD, /*ForVTable=*/false));
 
   SmallVector<CodeGenFunction::MultiVersionResolverOption, 10> Options;
   const TargetInfo &Target = getTarget();
@@ -2571,37 +2587,53 @@
   CGF.EmitMultiVersionResolver(ResolverFunc, Options);
 }
 
-/// If an ifunc for the specified mangled name is not in the module, create and
-/// return an llvm IFunc Function with the specified type.
-llvm::Constant *
-CodeGenModule::GetOrCreateMultiVersionIFunc(GlobalDecl GD, llvm::Type *DeclTy,
-                                            const FunctionDecl *FD) {
+/// If a dispatcher for the specified mangled name is not in the module, create
+/// and return an llvm Function with the specified type.
+llvm::Constant *CodeGenModule::GetOrCreateMultiVersionResolver(
+    GlobalDecl GD, llvm::Type *DeclTy, const FunctionDecl *FD) {
   std::string MangledName =
       getMangledNameImpl(*this, GD, FD, /*OmitMultiVersionMangling=*/true);
-  std::string IFuncName = MangledName + ".ifunc";
-  if (llvm::GlobalValue *IFuncGV = GetGlobalValue(IFuncName))
-    return IFuncGV;
+
+  // Holds the name of the resolver, in ifunc mode this is the ifunc (which has
+  // a separate resolver).
+  std::string ResolverName = MangledName;
+  if (getTarget().supportsIFunc())
+    ResolverName += ".ifunc";
+  else if (FD->isTargetMultiVersion())
+    ResolverName += ".resolver";
+
+  // If this already exists, just return that one.
+  if (llvm::GlobalValue *ResolverGV = GetGlobalValue(ResolverName))
+    return ResolverGV;
 
   // Since this is the first time we've created this IFunc, make sure
   // that we put this multiversioned function into the list to be
   // replaced later if necessary (target multiversioning only).
   if (!FD->isCPUDispatchMultiVersion() && !FD->isCPUSpecificMultiVersion())
     MultiVersionFuncs.push_back(GD);
 
-  std::string ResolverName = MangledName + ".resolver";
-  llvm::Type *ResolverType = llvm::FunctionType::get(
-      llvm::PointerType::get(DeclTy,
-                             Context.getTargetAddressSpace(FD->getType())),
-      false);
-  llvm::Constant *Resolver =
-      GetOrCreateLLVMFunction(ResolverName, ResolverType, GlobalDecl{},
-                              /*ForVTable=*/false);
-  llvm::GlobalIFunc *GIF = llvm::GlobalIFunc::create(
-      DeclTy, 0, llvm::Function::ExternalLinkage, "", Resolver, &getModule());
-  GIF->setName(IFuncName);
-  SetCommonAttributes(FD, GIF);
+  if (getTarget().supportsIFunc()) {
+    llvm::Type *ResolverType = llvm::FunctionType::get(
+        llvm::PointerType::get(
+            DeclTy, getContext().getTargetAddressSpace(FD->getType())),
+        false);
+    llvm::Constant *Resolver = GetOrCreateLLVMFunction(
+        MangledName + ".resolver", ResolverType, GlobalDecl{},
+        /*ForVTable=*/false);
+    llvm::GlobalIFunc *GIF = llvm::GlobalIFunc::create(
+        DeclTy, 0, llvm::Function::ExternalLinkage, "", Resolver, &getModule());
+    GIF->setName(ResolverName);
+    SetCommonAttributes(FD, GIF);
+
+    return GIF;
+  }
 
-  return GIF;
+  llvm::Constant *Resolver = GetOrCreateLLVMFunction(
+      ResolverName, DeclTy, GlobalDecl{}, /*ForVTable=*/false);
+  assert(isa<llvm::GlobalValue>(Resolver) &&
+         "Resolver should be created for the first time");
+  SetCommonAttributes(FD, cast<llvm::GlobalValue>(Resolver));
+  return Resolver;
 }
 
 /// GetOrCreateLLVMFunction - If the specified mangled name is not in the
@@ -2641,7 +2673,7 @@
       if (TA && TA->isDefaultVersion())
         UpdateMultiVersionNames(GD, FD);
       if (!IsForDefinition)
-        return GetOrCreateMultiVersionIFunc(GD, Ty, FD);
+        return GetOrCreateMultiVersionResolver(GD, Ty, FD);
     }
   }
 
Index: lib/CodeGen/CodeGenFunction.cpp
===================================================================
--- lib/CodeGen/CodeGenFunction.cpp
+++ lib/CodeGen/CodeGenFunction.cpp
@@ -2377,13 +2377,39 @@
   return Condition;
 }
 
+
+template<typename BuilderTy>
+static void CreateMultiVersionResolverReturn(llvm::Function *Resolver,
+                                             BuilderTy &Builder,
+                                             llvm::Function *FuncToReturn,
+                                             bool SupportsIFunc){
+  if (SupportsIFunc) {
+    Builder.CreateRet(FuncToReturn);
+    return;
+  }
+
+  llvm::SmallVector<llvm::Value*, 10> Args;
+  llvm::for_each(Resolver->args(),
+                 [&](llvm::Argument &Arg) { Args.push_back(&Arg); });
+
+  llvm::CallInst *Result = Builder.CreateCall(FuncToReturn, Args);
+
+  if (Resolver->getReturnType()->isVoidTy())
+    Builder.CreateRetVoid();
+  else
+    Builder.CreateRet(Result);
+}
+
 void CodeGenFunction::EmitMultiVersionResolver(
     llvm::Function *Resolver, ArrayRef<MultiVersionResolverOption> Options) {
   assert((getContext().getTargetInfo().getTriple().getArch() ==
               llvm::Triple::x86 ||
           getContext().getTargetInfo().getTriple().getArch() ==
               llvm::Triple::x86_64) &&
          "Only implemented for x86 targets");
+
+  bool SupportsIFunc = getContext().getTargetInfo().supportsIFunc();
+
   // Main function's basic block.
   llvm::BasicBlock *CurBlock = createBasicBlock("resolver_entry", Resolver);
   Builder.SetInsertPoint(CurBlock);
@@ -2397,13 +2423,15 @@
     if (!Condition) {
       assert(&RO == Options.end() - 1 &&
              "Default or Generic case must be last");
-      Builder.CreateRet(RO.Function);
+      CreateMultiVersionResolverReturn(Resolver, Builder, RO.Function,
+                                       SupportsIFunc);
       return;
     }
 
     llvm::BasicBlock *RetBlock = createBasicBlock("resolver_return", Resolver);
     llvm::IRBuilder<> RetBuilder(RetBlock);
-    RetBuilder.CreateRet(RO.Function);
+      CreateMultiVersionResolverReturn(Resolver, RetBuilder, RO.Function,
+                                       SupportsIFunc);
     CurBlock = createBasicBlock("resolver_else", Resolver);
     Builder.CreateCondBr(Condition, RetBlock, CurBlock);
   }
Index: lib/Basic/Targets/X86.h
===================================================================
--- lib/Basic/Targets/X86.h
+++ lib/Basic/Targets/X86.h
@@ -290,9 +290,6 @@
     return checkCPUKind(CPU = getCPUKind(Name));
   }
 
-  bool supportsMultiVersioning() const override {
-    return getTriple().isOSBinFormatELF();
-  }
   unsigned multiVersionSortPriority(StringRef Name) const override;
 
   bool setFPMath(StringRef Name) override;
Index: lib/AST/Decl.cpp
===================================================================
--- lib/AST/Decl.cpp
+++ lib/AST/Decl.cpp
@@ -2947,6 +2947,10 @@
   return isMultiVersion() && hasAttr<CPUSpecificAttr>();
 }
 
+bool FunctionDecl::isTargetMultiVersion() const {
+  return isMultiVersion() && hasAttr<TargetAttr>();
+}
+
 void
 FunctionDecl::setPreviousDeclaration(FunctionDecl *PrevDecl) {
   redeclarable_base::setPreviousDecl(PrevDecl);
Index: include/clang/Basic/TargetInfo.h
===================================================================
--- include/clang/Basic/TargetInfo.h
+++ include/clang/Basic/TargetInfo.h
@@ -1082,9 +1082,15 @@
     return false;
   }
 
-  /// Identify whether this taret supports multiversioning of functions,
+  /// Identify whether this target supports multiversioning of functions,
   /// which requires support for cpu_supports and cpu_is functionality.
-  virtual bool supportsMultiVersioning() const { return false; }
+  bool supportsMultiVersioning() const {
+    return getTriple().getArch() == llvm::Triple::x86 ||
+           getTriple().getArch() == llvm::Triple::x86_64;
+  }
+
+  /// Identify whether this target supports IFuncs.
+  bool supportsIFunc() const { return getTriple().isOSBinFormatELF(); }
 
   // Validate the contents of the __builtin_cpu_supports(const char*)
   // argument.
Index: include/clang/Basic/Attr.td
===================================================================
--- include/clang/Basic/Attr.td
+++ include/clang/Basic/Attr.td
@@ -858,7 +858,7 @@
 }
 
 def CPUSpecific : InheritableAttr {
-  let Spellings = [Clang<"cpu_specific">];
+  let Spellings = [Clang<"cpu_specific">, Declspec<"cpu_specific">];
   let Args = [VariadicIdentifierArgument<"Cpus">];
   let Subjects = SubjectList<[Function]>;
   let Documentation = [CPUSpecificCPUDispatchDocs];
@@ -872,7 +872,7 @@
 }
 
 def CPUDispatch : InheritableAttr {
-  let Spellings = [Clang<"cpu_dispatch">];
+  let Spellings = [Clang<"cpu_dispatch">, Declspec<"cpu_dispatch">];
   let Args = [VariadicIdentifierArgument<"Cpus">];
   let Subjects = SubjectList<[Function]>;
   let Documentation = [CPUSpecificCPUDispatchDocs];
Index: include/clang/AST/Decl.h
===================================================================
--- include/clang/AST/Decl.h
+++ include/clang/AST/Decl.h
@@ -2233,6 +2233,10 @@
   /// part of the cpu_specific/cpu_dispatch functionality.
   bool isCPUSpecificMultiVersion() const;
 
+  /// True if this function is a multiversioned dispatch function as a part of
+  /// the target functionality.
+  bool isTargetMultiVersion() const;
+
   void setPreviousDeclaration(FunctionDecl * PrevDecl);
 
   FunctionDecl *getCanonicalDecl() override;
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to