[PATCH] D53586: Implement Function Multiversioning for Non-ELF Systems.

2018-10-25 Thread Erich Keane via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL345298: Implement Function Multiversioning for Non-ELF 
Systems. (authored by erichkeane, committed by ).
Herald added a subscriber: llvm-commits.

Changed prior to commit:
  https://reviews.llvm.org/D53586?vs=171118&id=171160#toc

Repository:
  rL LLVM

https://reviews.llvm.org/D53586

Files:
  cfe/trunk/include/clang/AST/Decl.h
  cfe/trunk/include/clang/Basic/Attr.td
  cfe/trunk/include/clang/Basic/TargetInfo.h
  cfe/trunk/lib/AST/Decl.cpp
  cfe/trunk/lib/Basic/Targets/X86.h
  cfe/trunk/lib/CodeGen/CodeGenFunction.cpp
  cfe/trunk/lib/CodeGen/CodeGenFunction.h
  cfe/trunk/lib/CodeGen/CodeGenModule.cpp
  cfe/trunk/lib/CodeGen/CodeGenModule.h
  cfe/trunk/test/CodeGen/attr-cpuspecific.c
  cfe/trunk/test/CodeGen/attr-target-mv-func-ptrs.c
  cfe/trunk/test/CodeGen/attr-target-mv-va-args.c
  cfe/trunk/test/CodeGen/attr-target-mv.c
  cfe/trunk/test/CodeGenCXX/attr-target-mv-diff-ns.cpp
  cfe/trunk/test/CodeGenCXX/attr-target-mv-func-ptrs.cpp
  cfe/trunk/test/CodeGenCXX/attr-target-mv-inalloca.cpp
  cfe/trunk/test/CodeGenCXX/attr-target-mv-member-funcs.cpp
  cfe/trunk/test/CodeGenCXX/attr-target-mv-out-of-line-defs.cpp
  cfe/trunk/test/CodeGenCXX/attr-target-mv-overloads.cpp
  cfe/trunk/test/Sema/attr-target-mv-bad-target.c

Index: cfe/trunk/include/clang/AST/Decl.h
===
--- cfe/trunk/include/clang/AST/Decl.h
+++ cfe/trunk/include/clang/AST/Decl.h
@@ -2233,6 +2233,10 @@
   /// part of the cpu_specific/cpu_dispatch functionality.
   bool isCPUSpecificMultiVersion() const;
 
+  /// True if this function is a multiversioned dispatch function as a part of
+  /// the target functionality.
+  bool isTargetMultiVersion() const;
+
   void setPreviousDeclaration(FunctionDecl * PrevDecl);
 
   FunctionDecl *getCanonicalDecl() override;
Index: cfe/trunk/include/clang/Basic/TargetInfo.h
===
--- cfe/trunk/include/clang/Basic/TargetInfo.h
+++ cfe/trunk/include/clang/Basic/TargetInfo.h
@@ -1082,9 +1082,15 @@
 return false;
   }
 
-  /// Identify whether this taret supports multiversioning of functions,
+  /// Identify whether this target supports multiversioning of functions,
   /// which requires support for cpu_supports and cpu_is functionality.
-  virtual bool supportsMultiVersioning() const { return false; }
+  bool supportsMultiVersioning() const {
+return getTriple().getArch() == llvm::Triple::x86 ||
+   getTriple().getArch() == llvm::Triple::x86_64;
+  }
+
+  /// Identify whether this target supports IFuncs.
+  bool supportsIFunc() const { return getTriple().isOSBinFormatELF(); }
 
   // Validate the contents of the __builtin_cpu_supports(const char*)
   // argument.
Index: cfe/trunk/include/clang/Basic/Attr.td
===
--- cfe/trunk/include/clang/Basic/Attr.td
+++ cfe/trunk/include/clang/Basic/Attr.td
@@ -858,7 +858,7 @@
 }
 
 def CPUSpecific : InheritableAttr {
-  let Spellings = [Clang<"cpu_specific">];
+  let Spellings = [Clang<"cpu_specific">, Declspec<"cpu_specific">];
   let Args = [VariadicIdentifierArgument<"Cpus">];
   let Subjects = SubjectList<[Function]>;
   let Documentation = [CPUSpecificCPUDispatchDocs];
@@ -872,7 +872,7 @@
 }
 
 def CPUDispatch : InheritableAttr {
-  let Spellings = [Clang<"cpu_dispatch">];
+  let Spellings = [Clang<"cpu_dispatch">, Declspec<"cpu_dispatch">];
   let Args = [VariadicIdentifierArgument<"Cpus">];
   let Subjects = SubjectList<[Function]>;
   let Documentation = [CPUSpecificCPUDispatchDocs];
Index: cfe/trunk/test/CodeGen/attr-target-mv-va-args.c
===
--- cfe/trunk/test/CodeGen/attr-target-mv-va-args.c
+++ cfe/trunk/test/CodeGen/attr-target-mv-va-args.c
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=LINUX
+// RUN: %clang_cc1 -triple x86_64-windows-pc -emit-llvm %s -o - | FileCheck %s --check-prefix=WINDOWS
 int __attribute__((target("sse4.2"))) foo(int i, ...) { return 0; }
 int __attribute__((target("arch=sandybridge"))) foo(int i, ...);
 int __attribute__((target("arch=ivybridge"))) foo(int i, ...) {return 1;}
@@ -8,19 +9,37 @@
   return foo(1, 'a', 1.1) + foo(2, 2.2, "asdf");
 }
 
-// CHECK: @foo.ifunc = ifunc i32 (i32, ...), i32 (i32, ...)* ()* @foo.resolver
-// CHECK: define i32 @foo.sse4.2(i32 %i, ...)
-// CHECK: ret i32 0
-// CHECK: define i32 @foo.arch_ivybridge(i32 %i, ...)
-// CHECK: ret i32 1
-// CHECK: define i32 @foo(i32 %i, ...)
-// CHECK: ret i32 2
-// CHECK: define i32 @bar()
-// CHECK: call i32 (i32, ...) @foo.ifunc(i32 1, i32 97, double
-// CHECK: call i32 (i32, ...) @foo.ifunc(i32 2, double 2.2{{[0-9Ee+]+}}, i8* getelementptr inbounds 
-// CHECK: define i32 (i32, ...

[PATCH] D53586: Implement Function Multiversioning for Non-ELF Systems.

2018-10-25 Thread Erich Keane via Phabricator via cfe-commits
erichkeane added a comment.

In https://reviews.llvm.org/D53586#1276198, @rnk wrote:

> Here's a thought. What happens if different TUs observe different overload 
> sets, perhaps because of ifdefs? Different TUs will generate different 
> resolvers, but they won't dispatch to the same sets of targets. I'm guessing 
> we'd treat that as an ODR violation, no diagnostic required?
>
> Anyway, I think this is ready to commit. Thanks!


Yep, pretty much. Its a bit of a weakness of the 'target' mv (since dispatch 
requires you to explicitly list), but I believe it is a trade off that gcc felt 
was acceptable.x`x`


https://reviews.llvm.org/D53586



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D53586: Implement Function Multiversioning for Non-ELF Systems.

2018-10-25 Thread Reid Kleckner via Phabricator via cfe-commits
rnk accepted this revision.
rnk added a comment.

Here's a thought. What happens if different TUs observe different overload 
sets, perhaps because of ifdefs? Different TUs will generate different 
resolvers, but they won't dispatch to the same sets of targets. I'm guessing 
we'd treat that as an ODR violation, no diagnostic required?

Anyway, I think this is ready to commit. Thanks!




Comment at: test/CodeGen/attr-target-mv-va-args.c:41-44
+// WINDOWS: call i32 (i32, ...) @foo.arch_sandybridge
+// WINDOWS: call i32 (i32, ...) @foo.arch_ivybridge
+// WINDOWS: call i32 (i32, ...) @foo.sse4.2
+// WINDOWS: call i32 (i32, ...) @foo

Probably worth matching out the musttail for variadics, since the ellipsis 
doesn't make sense without it.



Comment at: test/CodeGenCXX/attr-target-mv-inalloca.cpp:45-47
+// WINDOWS: musttail call i32 @"?bar@@YAHUFoo@@@Z.arch_ivybridge"(<{ 
%struct.Foo }>* %0)
+// WINDOWS: musttail call i32 @"?bar@@YAHUFoo@@@Z.sse4.2"(<{ %struct.Foo }>* 
%0)
+// WINDOWS: musttail call i32 @"?bar@@YAHUFoo@@@Z"(<{ %struct.Foo }>* %0)

I think it's worth matching the retval and doing CHECK-NEXT for the ret, since 
that will check that the call is in fact in tail position, which is a verifier 
requirement for musttail.


https://reviews.llvm.org/D53586



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D53586: Implement Function Multiversioning for Non-ELF Systems.

2018-10-25 Thread Erich Keane via Phabricator via cfe-commits
erichkeane updated this revision to Diff 171118.
erichkeane added a comment.

ACTUALLY add the test this time :/  sorry about that!


https://reviews.llvm.org/D53586

Files:
  include/clang/AST/Decl.h
  include/clang/Basic/Attr.td
  include/clang/Basic/TargetInfo.h
  lib/AST/Decl.cpp
  lib/Basic/Targets/X86.h
  lib/CodeGen/CodeGenFunction.cpp
  lib/CodeGen/CodeGenFunction.h
  lib/CodeGen/CodeGenModule.cpp
  lib/CodeGen/CodeGenModule.h
  test/CodeGen/attr-cpuspecific.c
  test/CodeGen/attr-target-mv-func-ptrs.c
  test/CodeGen/attr-target-mv-va-args.c
  test/CodeGen/attr-target-mv.c
  test/CodeGenCXX/attr-target-mv-diff-ns.cpp
  test/CodeGenCXX/attr-target-mv-func-ptrs.cpp
  test/CodeGenCXX/attr-target-mv-inalloca.cpp
  test/CodeGenCXX/attr-target-mv-member-funcs.cpp
  test/CodeGenCXX/attr-target-mv-out-of-line-defs.cpp
  test/CodeGenCXX/attr-target-mv-overloads.cpp
  test/Sema/attr-target-mv-bad-target.c

Index: test/Sema/attr-target-mv-bad-target.c
===
--- test/Sema/attr-target-mv-bad-target.c
+++ test/Sema/attr-target-mv-bad-target.c
@@ -1,4 +1,3 @@
-// RUN: %clang_cc1 -triple x86_64-windows-pc  -fsyntax-only -verify %s
 // RUN: %clang_cc1 -triple arm-none-eabi  -fsyntax-only -verify %s
 
 int __attribute__((target("sse4.2"))) redecl1(void) { return 1; }
Index: test/CodeGenCXX/attr-target-mv-overloads.cpp
===
--- test/CodeGenCXX/attr-target-mv-overloads.cpp
+++ test/CodeGenCXX/attr-target-mv-overloads.cpp
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=LINUX
+// RUN: %clang_cc1 -std=c++11 -triple x86_64-windows-pc -emit-llvm %s -o - | FileCheck %s --check-prefix=WINDOWS
 
 int __attribute__((target("sse4.2"))) foo_overload(int) { return 0; }
 int __attribute__((target("arch=sandybridge"))) foo_overload(int);
@@ -13,38 +14,69 @@
   return foo_overload() + foo_overload(1);
 }
 
-// CHECK: @_Z12foo_overloadv.ifunc = ifunc i32 (), i32 ()* ()* @_Z12foo_overloadv.resolver
-// CHECK: @_Z12foo_overloadi.ifunc = ifunc i32 (i32), i32 (i32)* ()* @_Z12foo_overloadi.resolver
-
-
-// CHECK: define i32 @_Z12foo_overloadi.sse4.2(i32)
-// CHECK: ret i32 0
-// CHECK: define i32 @_Z12foo_overloadi.arch_ivybridge(i32)
-// CHECK: ret i32 1
-// CHECK: define i32 @_Z12foo_overloadi(i32)
-// CHECK: ret i32 2
-// CHECK: define i32 @_Z12foo_overloadv.sse4.2()
-// CHECK: ret i32 0
-// CHECK: define i32 @_Z12foo_overloadv.arch_ivybridge()
-// CHECK: ret i32 1
-// CHECK: define i32 @_Z12foo_overloadv()
-// CHECK: ret i32 2
-
-// CHECK: define i32 @_Z4bar2v()
-// CHECK: call i32 @_Z12foo_overloadv.ifunc()
-// CHECK: call i32 @_Z12foo_overloadi.ifunc(i32 1)
-
-// CHECK: define i32 ()* @_Z12foo_overloadv.resolver() comdat
-// CHECK: ret i32 ()* @_Z12foo_overloadv.arch_sandybridge
-// CHECK: ret i32 ()* @_Z12foo_overloadv.arch_ivybridge
-// CHECK: ret i32 ()* @_Z12foo_overloadv.sse4.2
-// CHECK: ret i32 ()* @_Z12foo_overloadv
-
-// CHECK: define i32 (i32)* @_Z12foo_overloadi.resolver() comdat
-// CHECK: ret i32 (i32)* @_Z12foo_overloadi.arch_sandybridge
-// CHECK: ret i32 (i32)* @_Z12foo_overloadi.arch_ivybridge
-// CHECK: ret i32 (i32)* @_Z12foo_overloadi.sse4.2
-// CHECK: ret i32 (i32)* @_Z12foo_overloadi
-
-// CHECK: declare i32 @_Z12foo_overloadv.arch_sandybridge()
-// CHECK: declare i32 @_Z12foo_overloadi.arch_sandybridge(i32)
+// LINUX: @_Z12foo_overloadv.ifunc = ifunc i32 (), i32 ()* ()* @_Z12foo_overloadv.resolver
+// LINUX: @_Z12foo_overloadi.ifunc = ifunc i32 (i32), i32 (i32)* ()* @_Z12foo_overloadi.resolver
+
+// LINUX: define i32 @_Z12foo_overloadi.sse4.2(i32)
+// LINUX: ret i32 0
+// LINUX: define i32 @_Z12foo_overloadi.arch_ivybridge(i32)
+// LINUX: ret i32 1
+// LINUX: define i32 @_Z12foo_overloadi(i32)
+// LINUX: ret i32 2
+// LINUX: define i32 @_Z12foo_overloadv.sse4.2()
+// LINUX: ret i32 0
+// LINUX: define i32 @_Z12foo_overloadv.arch_ivybridge()
+// LINUX: ret i32 1
+// LINUX: define i32 @_Z12foo_overloadv()
+// LINUX: ret i32 2
+
+// WINDOWS: define dso_local i32 @"?foo_overload@@YAHH@Z.sse4.2"(i32)
+// WINDOWS: ret i32 0
+// WINDOWS: define dso_local i32 @"?foo_overload@@YAHH@Z.arch_ivybridge"(i32)
+// WINDOWS: ret i32 1
+// WINDOWS: define dso_local i32 @"?foo_overload@@YAHH@Z"(i32)
+// WINDOWS: ret i32 2
+// WINDOWS: define dso_local i32 @"?foo_overload@@YAHXZ.sse4.2"()
+// WINDOWS: ret i32 0
+// WINDOWS: define dso_local i32 @"?foo_overload@@YAHXZ.arch_ivybridge"()
+// WINDOWS: ret i32 1
+// WINDOWS: define dso_local i32 @"?foo_overload@@YAHXZ"()
+// WINDOWS: ret i32 2
+
+// LINUX: define i32 @_Z4bar2v()
+// LINUX: call i32 @_Z12foo_overloadv.ifunc()
+// LINUX: call i32 @_Z12foo_overloadi.ifunc(i32 1)
+
+// WINDOWS: define dso_local i32 @"?bar2@@YAHXZ"()
+// WINDOWS: call i32 @"?foo_overload@@YAHXZ.resolver"()
+// WINDOWS: call 

[PATCH] D53586: Implement Function Multiversioning for Non-ELF Systems.

2018-10-25 Thread Erich Keane via Phabricator via cfe-commits
erichkeane updated this revision to Diff 171088.
erichkeane marked an inline comment as done.
erichkeane added a comment.

Added test as requested by @rnk.  How's it look?  I hope I got the balance of 
check-lines right.


https://reviews.llvm.org/D53586

Files:
  include/clang/AST/Decl.h
  include/clang/Basic/Attr.td
  include/clang/Basic/TargetInfo.h
  lib/AST/Decl.cpp
  lib/Basic/Targets/X86.h
  lib/CodeGen/CodeGenFunction.cpp
  lib/CodeGen/CodeGenFunction.h
  lib/CodeGen/CodeGenModule.cpp
  lib/CodeGen/CodeGenModule.h
  test/CodeGen/attr-cpuspecific.c
  test/CodeGen/attr-target-mv-func-ptrs.c
  test/CodeGen/attr-target-mv-va-args.c
  test/CodeGen/attr-target-mv.c
  test/CodeGenCXX/attr-target-mv-diff-ns.cpp
  test/CodeGenCXX/attr-target-mv-func-ptrs.cpp
  test/CodeGenCXX/attr-target-mv-member-funcs.cpp
  test/CodeGenCXX/attr-target-mv-out-of-line-defs.cpp
  test/CodeGenCXX/attr-target-mv-overloads.cpp
  test/Sema/attr-target-mv-bad-target.c

Index: test/Sema/attr-target-mv-bad-target.c
===
--- test/Sema/attr-target-mv-bad-target.c
+++ test/Sema/attr-target-mv-bad-target.c
@@ -1,4 +1,3 @@
-// RUN: %clang_cc1 -triple x86_64-windows-pc  -fsyntax-only -verify %s
 // RUN: %clang_cc1 -triple arm-none-eabi  -fsyntax-only -verify %s
 
 int __attribute__((target("sse4.2"))) redecl1(void) { return 1; }
Index: test/CodeGenCXX/attr-target-mv-overloads.cpp
===
--- test/CodeGenCXX/attr-target-mv-overloads.cpp
+++ test/CodeGenCXX/attr-target-mv-overloads.cpp
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=LINUX
+// RUN: %clang_cc1 -std=c++11 -triple x86_64-windows-pc -emit-llvm %s -o - | FileCheck %s --check-prefix=WINDOWS
 
 int __attribute__((target("sse4.2"))) foo_overload(int) { return 0; }
 int __attribute__((target("arch=sandybridge"))) foo_overload(int);
@@ -13,38 +14,69 @@
   return foo_overload() + foo_overload(1);
 }
 
-// CHECK: @_Z12foo_overloadv.ifunc = ifunc i32 (), i32 ()* ()* @_Z12foo_overloadv.resolver
-// CHECK: @_Z12foo_overloadi.ifunc = ifunc i32 (i32), i32 (i32)* ()* @_Z12foo_overloadi.resolver
-
-
-// CHECK: define i32 @_Z12foo_overloadi.sse4.2(i32)
-// CHECK: ret i32 0
-// CHECK: define i32 @_Z12foo_overloadi.arch_ivybridge(i32)
-// CHECK: ret i32 1
-// CHECK: define i32 @_Z12foo_overloadi(i32)
-// CHECK: ret i32 2
-// CHECK: define i32 @_Z12foo_overloadv.sse4.2()
-// CHECK: ret i32 0
-// CHECK: define i32 @_Z12foo_overloadv.arch_ivybridge()
-// CHECK: ret i32 1
-// CHECK: define i32 @_Z12foo_overloadv()
-// CHECK: ret i32 2
-
-// CHECK: define i32 @_Z4bar2v()
-// CHECK: call i32 @_Z12foo_overloadv.ifunc()
-// CHECK: call i32 @_Z12foo_overloadi.ifunc(i32 1)
-
-// CHECK: define i32 ()* @_Z12foo_overloadv.resolver() comdat
-// CHECK: ret i32 ()* @_Z12foo_overloadv.arch_sandybridge
-// CHECK: ret i32 ()* @_Z12foo_overloadv.arch_ivybridge
-// CHECK: ret i32 ()* @_Z12foo_overloadv.sse4.2
-// CHECK: ret i32 ()* @_Z12foo_overloadv
-
-// CHECK: define i32 (i32)* @_Z12foo_overloadi.resolver() comdat
-// CHECK: ret i32 (i32)* @_Z12foo_overloadi.arch_sandybridge
-// CHECK: ret i32 (i32)* @_Z12foo_overloadi.arch_ivybridge
-// CHECK: ret i32 (i32)* @_Z12foo_overloadi.sse4.2
-// CHECK: ret i32 (i32)* @_Z12foo_overloadi
-
-// CHECK: declare i32 @_Z12foo_overloadv.arch_sandybridge()
-// CHECK: declare i32 @_Z12foo_overloadi.arch_sandybridge(i32)
+// LINUX: @_Z12foo_overloadv.ifunc = ifunc i32 (), i32 ()* ()* @_Z12foo_overloadv.resolver
+// LINUX: @_Z12foo_overloadi.ifunc = ifunc i32 (i32), i32 (i32)* ()* @_Z12foo_overloadi.resolver
+
+// LINUX: define i32 @_Z12foo_overloadi.sse4.2(i32)
+// LINUX: ret i32 0
+// LINUX: define i32 @_Z12foo_overloadi.arch_ivybridge(i32)
+// LINUX: ret i32 1
+// LINUX: define i32 @_Z12foo_overloadi(i32)
+// LINUX: ret i32 2
+// LINUX: define i32 @_Z12foo_overloadv.sse4.2()
+// LINUX: ret i32 0
+// LINUX: define i32 @_Z12foo_overloadv.arch_ivybridge()
+// LINUX: ret i32 1
+// LINUX: define i32 @_Z12foo_overloadv()
+// LINUX: ret i32 2
+
+// WINDOWS: define dso_local i32 @"?foo_overload@@YAHH@Z.sse4.2"(i32)
+// WINDOWS: ret i32 0
+// WINDOWS: define dso_local i32 @"?foo_overload@@YAHH@Z.arch_ivybridge"(i32)
+// WINDOWS: ret i32 1
+// WINDOWS: define dso_local i32 @"?foo_overload@@YAHH@Z"(i32)
+// WINDOWS: ret i32 2
+// WINDOWS: define dso_local i32 @"?foo_overload@@YAHXZ.sse4.2"()
+// WINDOWS: ret i32 0
+// WINDOWS: define dso_local i32 @"?foo_overload@@YAHXZ.arch_ivybridge"()
+// WINDOWS: ret i32 1
+// WINDOWS: define dso_local i32 @"?foo_overload@@YAHXZ"()
+// WINDOWS: ret i32 2
+
+// LINUX: define i32 @_Z4bar2v()
+// LINUX: call i32 @_Z12foo_overloadv.ifunc()
+// LINUX: call i32 @_Z12foo_overloadi.ifunc(i32 1)
+
+// WINDOWS: define dso_local i32 @"?bar2@@YAHXZ"()
+// WINDOWS: call i32 @"?foo_ove

[PATCH] D53586: Implement Function Multiversioning for Non-ELF Systems.

2018-10-24 Thread Reid Kleckner via Phabricator via cfe-commits
rnk added inline comments.



Comment at: lib/CodeGen/CodeGenFunction.cpp:2395
+
+  llvm::CallInst *Result = Builder.CreateCall(FuncToReturn, Args);
+

rnk wrote:
> erichkeane wrote:
> > erichkeane wrote:
> > > rnk wrote:
> > > > This approach is... not going to work in the general case. Consider, 
> > > > for example, varargs. Then consider 32-bit x86 inalloca, which is quite 
> > > > widespread. Any non-trivially copyable object is not going to be 
> > > > passable through such a thunk. You might consider looking at 
> > > > CodeGenFunction::EmitMustTailThunk instead.
> > > Oh dear... 
> > > I'm unfamiliar with EmitMustTailThunk, is it self explanatory?  Any 
> > > chance you can expound?  Should I call/use that function, or copy out of 
> > > it?
> > I looked through that function, and it seems to do very similar things to 
> > this...
> > 
> > It FIRST does the small-vector conversion to a value* array like I do.
> > 
> > Second, it 'adjusts' the 'this' parameter.  This doesn't seem like it needs 
> > to happen, because the type isn't changing, right?  my 'this' pointer is 
> > still pass-on-able.
> > 
> > Third, it creates the call, then marks it TCK_MustTail.
> > 
> > Forth, it sets the attributes/calling convention of the 'call' object, but 
> > I'd expect that to come from the llvm::Function object in the 'CreateCall', 
> > right?  I can add them after the fact I guess, if we see value.*(see below)
> > 
> > Finally, it does the CreateRetVoid/CreateRet like this function does below. 
> > 
> > *I DO have a test that claims that my IR is broken if I set the call 
> > attributes.  Curiously only 1 of my tests, but the error is:
> > "cannot guarantee tail call due to mismatched ABI impacting function 
> > attributes"
> > 
> > The only difference in attributes is target-features and target-cpu though, 
> > so I don't know what causes this.
> Right, the this-adjustment is definitely unnecessary for this kind of CPU 
> dispatch thunk.
> 
> The important thing is that marking the call as `musttail` ensures that you 
> get perfect forwarding of all arguments, even varargs and inalloca. Maybe 
> it's enough to just mark the call you are creating that way. I think the code 
> you are generating obeys all the musttail invariants, so that might be best.
> 
> The "cannot guarantee tail call due to mismatched ABI impacting function 
> attributes" verifier check is only supposed to fire when parameter attributes 
> like `byval` and `inreg` differ between the call site and the dispatcher 
> function. It shouldn't care about target-cpu or target-features.
It's probably worth adding a test that uses inalloca, since that's why we're 
doing this nonsense. It would look like:
```
struct Foo {
  Foo();
  Foo(const Foo &o);
  ~Foo();
  int x;
};
int __attribute__((target("default"))) bar(Foo o) { return o.x; }
int __attribute__((target("sse4.2"))) bar(Foo o) { return o.x + 1; }
int __attribute__((target("arch=ivybridge"))) bar(Foo o) { return o.x + 2; }
```

Targetting i686-windows-msvc.


https://reviews.llvm.org/D53586



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D53586: Implement Function Multiversioning for Non-ELF Systems.

2018-10-24 Thread Eric Christopher via Phabricator via cfe-commits
echristo accepted this revision.
echristo added a comment.
This revision is now accepted and ready to land.

All of the target specific stuff looks fine to me. I'm going to defer to rnk 
about the windows side of things and aaron for the attributes.


https://reviews.llvm.org/D53586



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D53586: Implement Function Multiversioning for Non-ELF Systems.

2018-10-24 Thread Erich Keane via Phabricator via cfe-commits
erichkeane updated this revision to Diff 170987.
erichkeane added a comment.

did everything suggested by @rnk as far as I know.  Thanks again for the 
reviews!


https://reviews.llvm.org/D53586

Files:
  include/clang/AST/Decl.h
  include/clang/Basic/Attr.td
  include/clang/Basic/TargetInfo.h
  lib/AST/Decl.cpp
  lib/Basic/Targets/X86.h
  lib/CodeGen/CodeGenFunction.cpp
  lib/CodeGen/CodeGenFunction.h
  lib/CodeGen/CodeGenModule.cpp
  lib/CodeGen/CodeGenModule.h
  test/CodeGen/attr-cpuspecific.c
  test/CodeGen/attr-target-mv-func-ptrs.c
  test/CodeGen/attr-target-mv-va-args.c
  test/CodeGen/attr-target-mv.c
  test/CodeGenCXX/attr-target-mv-diff-ns.cpp
  test/CodeGenCXX/attr-target-mv-func-ptrs.cpp
  test/CodeGenCXX/attr-target-mv-member-funcs.cpp
  test/CodeGenCXX/attr-target-mv-out-of-line-defs.cpp
  test/CodeGenCXX/attr-target-mv-overloads.cpp
  test/Sema/attr-target-mv-bad-target.c

Index: test/Sema/attr-target-mv-bad-target.c
===
--- test/Sema/attr-target-mv-bad-target.c
+++ test/Sema/attr-target-mv-bad-target.c
@@ -1,4 +1,3 @@
-// RUN: %clang_cc1 -triple x86_64-windows-pc  -fsyntax-only -verify %s
 // RUN: %clang_cc1 -triple arm-none-eabi  -fsyntax-only -verify %s
 
 int __attribute__((target("sse4.2"))) redecl1(void) { return 1; }
Index: test/CodeGenCXX/attr-target-mv-overloads.cpp
===
--- test/CodeGenCXX/attr-target-mv-overloads.cpp
+++ test/CodeGenCXX/attr-target-mv-overloads.cpp
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=LINUX
+// RUN: %clang_cc1 -std=c++11 -triple x86_64-windows-pc -emit-llvm %s -o - | FileCheck %s --check-prefix=WINDOWS
 
 int __attribute__((target("sse4.2"))) foo_overload(int) { return 0; }
 int __attribute__((target("arch=sandybridge"))) foo_overload(int);
@@ -13,38 +14,69 @@
   return foo_overload() + foo_overload(1);
 }
 
-// CHECK: @_Z12foo_overloadv.ifunc = ifunc i32 (), i32 ()* ()* @_Z12foo_overloadv.resolver
-// CHECK: @_Z12foo_overloadi.ifunc = ifunc i32 (i32), i32 (i32)* ()* @_Z12foo_overloadi.resolver
-
-
-// CHECK: define i32 @_Z12foo_overloadi.sse4.2(i32)
-// CHECK: ret i32 0
-// CHECK: define i32 @_Z12foo_overloadi.arch_ivybridge(i32)
-// CHECK: ret i32 1
-// CHECK: define i32 @_Z12foo_overloadi(i32)
-// CHECK: ret i32 2
-// CHECK: define i32 @_Z12foo_overloadv.sse4.2()
-// CHECK: ret i32 0
-// CHECK: define i32 @_Z12foo_overloadv.arch_ivybridge()
-// CHECK: ret i32 1
-// CHECK: define i32 @_Z12foo_overloadv()
-// CHECK: ret i32 2
-
-// CHECK: define i32 @_Z4bar2v()
-// CHECK: call i32 @_Z12foo_overloadv.ifunc()
-// CHECK: call i32 @_Z12foo_overloadi.ifunc(i32 1)
-
-// CHECK: define i32 ()* @_Z12foo_overloadv.resolver() comdat
-// CHECK: ret i32 ()* @_Z12foo_overloadv.arch_sandybridge
-// CHECK: ret i32 ()* @_Z12foo_overloadv.arch_ivybridge
-// CHECK: ret i32 ()* @_Z12foo_overloadv.sse4.2
-// CHECK: ret i32 ()* @_Z12foo_overloadv
-
-// CHECK: define i32 (i32)* @_Z12foo_overloadi.resolver() comdat
-// CHECK: ret i32 (i32)* @_Z12foo_overloadi.arch_sandybridge
-// CHECK: ret i32 (i32)* @_Z12foo_overloadi.arch_ivybridge
-// CHECK: ret i32 (i32)* @_Z12foo_overloadi.sse4.2
-// CHECK: ret i32 (i32)* @_Z12foo_overloadi
-
-// CHECK: declare i32 @_Z12foo_overloadv.arch_sandybridge()
-// CHECK: declare i32 @_Z12foo_overloadi.arch_sandybridge(i32)
+// LINUX: @_Z12foo_overloadv.ifunc = ifunc i32 (), i32 ()* ()* @_Z12foo_overloadv.resolver
+// LINUX: @_Z12foo_overloadi.ifunc = ifunc i32 (i32), i32 (i32)* ()* @_Z12foo_overloadi.resolver
+
+// LINUX: define i32 @_Z12foo_overloadi.sse4.2(i32)
+// LINUX: ret i32 0
+// LINUX: define i32 @_Z12foo_overloadi.arch_ivybridge(i32)
+// LINUX: ret i32 1
+// LINUX: define i32 @_Z12foo_overloadi(i32)
+// LINUX: ret i32 2
+// LINUX: define i32 @_Z12foo_overloadv.sse4.2()
+// LINUX: ret i32 0
+// LINUX: define i32 @_Z12foo_overloadv.arch_ivybridge()
+// LINUX: ret i32 1
+// LINUX: define i32 @_Z12foo_overloadv()
+// LINUX: ret i32 2
+
+// WINDOWS: define dso_local i32 @"?foo_overload@@YAHH@Z.sse4.2"(i32)
+// WINDOWS: ret i32 0
+// WINDOWS: define dso_local i32 @"?foo_overload@@YAHH@Z.arch_ivybridge"(i32)
+// WINDOWS: ret i32 1
+// WINDOWS: define dso_local i32 @"?foo_overload@@YAHH@Z"(i32)
+// WINDOWS: ret i32 2
+// WINDOWS: define dso_local i32 @"?foo_overload@@YAHXZ.sse4.2"()
+// WINDOWS: ret i32 0
+// WINDOWS: define dso_local i32 @"?foo_overload@@YAHXZ.arch_ivybridge"()
+// WINDOWS: ret i32 1
+// WINDOWS: define dso_local i32 @"?foo_overload@@YAHXZ"()
+// WINDOWS: ret i32 2
+
+// LINUX: define i32 @_Z4bar2v()
+// LINUX: call i32 @_Z12foo_overloadv.ifunc()
+// LINUX: call i32 @_Z12foo_overloadi.ifunc(i32 1)
+
+// WINDOWS: define dso_local i32 @"?bar2@@YAHXZ"()
+// WINDOWS: call i32 @"?foo_overload@@YAHXZ.resolver"()
+// WINDOWS: call i32 @"?foo_overlo

[PATCH] D53586: Implement Function Multiversioning for Non-ELF Systems.

2018-10-24 Thread Reid Kleckner via Phabricator via cfe-commits
rnk added inline comments.



Comment at: lib/CodeGen/CodeGenFunction.cpp:2391-2393
+  llvm::SmallVector Args;
+  llvm::for_each(Resolver->args(),
+ [&](llvm::Argument &Arg) { Args.push_back(&Arg); });

erichkeane wrote:
> rnk wrote:
> > Surely this would be simpler as a range for or `Args{Resolver->arg_begin() 
> > ...};`
> Surely... Unfortunately the types don't match.  Resolver Args are an array of 
> Arguments (so the iterator is a Arg*).
> 
> HOWEVER, "Args" here needs to be an array ref to Value*, so the iterator 
> would have to be a Value** (not a Arg*/Value*).  The point of this small 
> vector is to create an array of _pointers_ from the array of Arguments.
Right. :)



Comment at: lib/CodeGen/CodeGenFunction.cpp:2395
+
+  llvm::CallInst *Result = Builder.CreateCall(FuncToReturn, Args);
+

erichkeane wrote:
> erichkeane wrote:
> > rnk wrote:
> > > This approach is... not going to work in the general case. Consider, for 
> > > example, varargs. Then consider 32-bit x86 inalloca, which is quite 
> > > widespread. Any non-trivially copyable object is not going to be passable 
> > > through such a thunk. You might consider looking at 
> > > CodeGenFunction::EmitMustTailThunk instead.
> > Oh dear... 
> > I'm unfamiliar with EmitMustTailThunk, is it self explanatory?  Any chance 
> > you can expound?  Should I call/use that function, or copy out of it?
> I looked through that function, and it seems to do very similar things to 
> this...
> 
> It FIRST does the small-vector conversion to a value* array like I do.
> 
> Second, it 'adjusts' the 'this' parameter.  This doesn't seem like it needs 
> to happen, because the type isn't changing, right?  my 'this' pointer is 
> still pass-on-able.
> 
> Third, it creates the call, then marks it TCK_MustTail.
> 
> Forth, it sets the attributes/calling convention of the 'call' object, but 
> I'd expect that to come from the llvm::Function object in the 'CreateCall', 
> right?  I can add them after the fact I guess, if we see value.*(see below)
> 
> Finally, it does the CreateRetVoid/CreateRet like this function does below. 
> 
> *I DO have a test that claims that my IR is broken if I set the call 
> attributes.  Curiously only 1 of my tests, but the error is:
> "cannot guarantee tail call due to mismatched ABI impacting function 
> attributes"
> 
> The only difference in attributes is target-features and target-cpu though, 
> so I don't know what causes this.
Right, the this-adjustment is definitely unnecessary for this kind of CPU 
dispatch thunk.

The important thing is that marking the call as `musttail` ensures that you get 
perfect forwarding of all arguments, even varargs and inalloca. Maybe it's 
enough to just mark the call you are creating that way. I think the code you 
are generating obeys all the musttail invariants, so that might be best.

The "cannot guarantee tail call due to mismatched ABI impacting function 
attributes" verifier check is only supposed to fire when parameter attributes 
like `byval` and `inreg` differ between the call site and the dispatcher 
function. It shouldn't care about target-cpu or target-features.



Comment at: test/CodeGenCXX/attr-target-mv-overloads.cpp:74
+// WINDOWS: define dso_local i32 @"?foo_overload@@YAHH@Z.resolver"(i32) comdat
+// WINDOWS: call i32 @"?foo_overload@@YAHH@Z.arch_sandybridge"
+// WINDOWS: call i32 @"?foo_overload@@YAHH@Z.arch_ivybridge"

erichkeane wrote:
> rnk wrote:
> > Does dumpbin demangle these names correctly? Just curious.
> I don't know, I'd suspect not.  However, demangler.com (not sure what it 
> uses?) seems to demangle them ignoring everything after the '.'.
Neat.


Repository:
  rC Clang

https://reviews.llvm.org/D53586



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D53586: Implement Function Multiversioning for Non-ELF Systems.

2018-10-24 Thread Erich Keane via Phabricator via cfe-commits
erichkeane marked an inline comment as done.
erichkeane added a comment.

@rnk: I looked into the EmitMustTailThunk code, and don't terribly see what I'm 
doing wrong.  I clearly cannot CALL that function (since this works with 
non-CXXMethodDecls), so I looked into using it.  However, I saw that it does a 
couple of small things that seem useful, but not anything that seems to fix 
your concerns.

Can you espouse a bit for me?  I'm likely missing something, but am out of 
ideas on what it could be.




Comment at: lib/CodeGen/CodeGenFunction.cpp:2395
+
+  llvm::CallInst *Result = Builder.CreateCall(FuncToReturn, Args);
+

erichkeane wrote:
> rnk wrote:
> > This approach is... not going to work in the general case. Consider, for 
> > example, varargs. Then consider 32-bit x86 inalloca, which is quite 
> > widespread. Any non-trivially copyable object is not going to be passable 
> > through such a thunk. You might consider looking at 
> > CodeGenFunction::EmitMustTailThunk instead.
> Oh dear... 
> I'm unfamiliar with EmitMustTailThunk, is it self explanatory?  Any chance 
> you can expound?  Should I call/use that function, or copy out of it?
I looked through that function, and it seems to do very similar things to 
this...

It FIRST does the small-vector conversion to a value* array like I do.

Second, it 'adjusts' the 'this' parameter.  This doesn't seem like it needs to 
happen, because the type isn't changing, right?  my 'this' pointer is still 
pass-on-able.

Third, it creates the call, then marks it TCK_MustTail.

Forth, it sets the attributes/calling convention of the 'call' object, but I'd 
expect that to come from the llvm::Function object in the 'CreateCall', right?  
I can add them after the fact I guess, if we see value.*(see below)

Finally, it does the CreateRetVoid/CreateRet like this function does below. 

*I DO have a test that claims that my IR is broken if I set the call 
attributes.  Curiously only 1 of my tests, but the error is:
"cannot guarantee tail call due to mismatched ABI impacting function attributes"

The only difference in attributes is target-features and target-cpu though, so 
I don't know what causes this.


Repository:
  rC Clang

https://reviews.llvm.org/D53586



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D53586: Implement Function Multiversioning for Non-ELF Systems.

2018-10-24 Thread Erich Keane via Phabricator via cfe-commits
erichkeane marked 2 inline comments as done.
erichkeane added a comment.

In https://reviews.llvm.org/D53586#1273546, @rnk wrote:

> Seems reasonable. Should the resolver still be called `?foo@.resolver`, or 
> should it get a new name, since it is quite functionally different now?


I'm not attached to the name.  I didn't have a good alternative (I'd thought of 
'dispatcher' at one point, but figured that was because my mind was attached to 
cpu-dispatch).  Implementation wise, '.resolver' is slightly easier, but not 
enough to motivate the discussion.

If you like '.dispatcher' instead of '.resolver', I can do that, otherwise a 
brainstorming session would be appreciated!




Comment at: lib/CodeGen/CodeGenFunction.cpp:2391-2393
+  llvm::SmallVector Args;
+  llvm::for_each(Resolver->args(),
+ [&](llvm::Argument &Arg) { Args.push_back(&Arg); });

rnk wrote:
> Surely this would be simpler as a range for or `Args{Resolver->arg_begin() 
> ...};`
Surely... Unfortunately the types don't match.  Resolver Args are an array of 
Arguments (so the iterator is a Arg*).

HOWEVER, "Args" here needs to be an array ref to Value*, so the iterator would 
have to be a Value** (not a Arg*/Value*).  The point of this small vector is to 
create an array of _pointers_ from the array of Arguments.



Comment at: lib/CodeGen/CodeGenFunction.cpp:2395
+
+  llvm::CallInst *Result = Builder.CreateCall(FuncToReturn, Args);
+

rnk wrote:
> This approach is... not going to work in the general case. Consider, for 
> example, varargs. Then consider 32-bit x86 inalloca, which is quite 
> widespread. Any non-trivially copyable object is not going to be passable 
> through such a thunk. You might consider looking at 
> CodeGenFunction::EmitMustTailThunk instead.
Oh dear... 
I'm unfamiliar with EmitMustTailThunk, is it self explanatory?  Any chance you 
can expound?  Should I call/use that function, or copy out of it?



Comment at: lib/CodeGen/CodeGenFunction.cpp:2432
 llvm::BasicBlock *RetBlock = createBasicBlock("resolver_return", Resolver);
 llvm::IRBuilder<> RetBuilder(RetBlock);
+  CreateMultiVersionResolverReturn(Resolver, RetBuilder, RO.Function,

rnk wrote:
> You can make this a CGBuilderTy. Just pass it a type cache.
I'd tried that a while and could never find a type cache to use. I'd not 
realized until just now (searching more for usages) that CodeGenFunction is one!



Comment at: test/CodeGenCXX/attr-target-mv-overloads.cpp:74
+// WINDOWS: define dso_local i32 @"?foo_overload@@YAHH@Z.resolver"(i32) comdat
+// WINDOWS: call i32 @"?foo_overload@@YAHH@Z.arch_sandybridge"
+// WINDOWS: call i32 @"?foo_overload@@YAHH@Z.arch_ivybridge"

rnk wrote:
> Does dumpbin demangle these names correctly? Just curious.
I don't know, I'd suspect not.  However, demangler.com (not sure what it uses?) 
seems to demangle them ignoring everything after the '.'.


Repository:
  rC Clang

https://reviews.llvm.org/D53586



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D53586: Implement Function Multiversioning for Non-ELF Systems.

2018-10-23 Thread Reid Kleckner via Phabricator via cfe-commits
rnk added a comment.

Seems reasonable. Should the resolver still be called `?foo@.resolver`, or 
should it get a new name, since it is quite functionally different now?




Comment at: lib/CodeGen/CodeGenFunction.cpp:2381
+
+template
+static void CreateMultiVersionResolverReturn(llvm::Function *Resolver,

Rather than templating over IRBuilder and CGBuilderTy, I'd standardize on 
CGBuilderTy.



Comment at: lib/CodeGen/CodeGenFunction.cpp:2391-2393
+  llvm::SmallVector Args;
+  llvm::for_each(Resolver->args(),
+ [&](llvm::Argument &Arg) { Args.push_back(&Arg); });

Surely this would be simpler as a range for or `Args{Resolver->arg_begin() 
...};`



Comment at: lib/CodeGen/CodeGenFunction.cpp:2395
+
+  llvm::CallInst *Result = Builder.CreateCall(FuncToReturn, Args);
+

This approach is... not going to work in the general case. Consider, for 
example, varargs. Then consider 32-bit x86 inalloca, which is quite widespread. 
Any non-trivially copyable object is not going to be passable through such a 
thunk. You might consider looking at CodeGenFunction::EmitMustTailThunk instead.



Comment at: lib/CodeGen/CodeGenFunction.cpp:2432
 llvm::BasicBlock *RetBlock = createBasicBlock("resolver_return", Resolver);
 llvm::IRBuilder<> RetBuilder(RetBlock);
+  CreateMultiVersionResolverReturn(Resolver, RetBuilder, RO.Function,

You can make this a CGBuilderTy. Just pass it a type cache.



Comment at: test/CodeGenCXX/attr-target-mv-overloads.cpp:74
+// WINDOWS: define dso_local i32 @"?foo_overload@@YAHH@Z.resolver"(i32) comdat
+// WINDOWS: call i32 @"?foo_overload@@YAHH@Z.arch_sandybridge"
+// WINDOWS: call i32 @"?foo_overload@@YAHH@Z.arch_ivybridge"

Does dumpbin demangle these names correctly? Just curious.


Repository:
  rC Clang

https://reviews.llvm.org/D53586



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D53586: Implement Function Multiversioning for Non-ELF Systems.

2018-10-23 Thread Erich Keane via Phabricator via cfe-commits
erichkeane created this revision.
erichkeane added reviewers: echristo, rnk, aaron.ballman.
erichkeane added a subscriber: mibintc.

Similar to how ICC handles CPU-Dispatch on Windows, this patch uses the
resolver function directly to forward the call to the proper function.
This is not nearly as efficient as IFuncs of course, but is still quite
useful for large functions specifically developed for certain
processors.

This is unfortunately still limited to x86, since it depends on
__builtin_cpu_supports and __builtin_cpu_is, which are x86 builtins.

The naming for the resolver/forwarding function for cpu-dispatch was
taken from ICC's implementation, which uses the unmodified name for this
(no mangling additions).  This is possible, since cpu-dispatch uses '.A'
for the 'default' version.

In 'target' multiversioning, this function keeps the '.resolver'
extension in order to keep the default function keeping the default
mangling.


Repository:
  rC Clang

https://reviews.llvm.org/D53586

Files:
  include/clang/AST/Decl.h
  include/clang/Basic/Attr.td
  include/clang/Basic/TargetInfo.h
  lib/AST/Decl.cpp
  lib/Basic/Targets/X86.h
  lib/CodeGen/CodeGenFunction.cpp
  lib/CodeGen/CodeGenModule.cpp
  lib/CodeGen/CodeGenModule.h
  test/CodeGen/attr-cpuspecific.c
  test/CodeGen/attr-target-mv-func-ptrs.c
  test/CodeGen/attr-target-mv-va-args.c
  test/CodeGen/attr-target-mv.c
  test/CodeGenCXX/attr-target-mv-diff-ns.cpp
  test/CodeGenCXX/attr-target-mv-func-ptrs.cpp
  test/CodeGenCXX/attr-target-mv-member-funcs.cpp
  test/CodeGenCXX/attr-target-mv-out-of-line-defs.cpp
  test/CodeGenCXX/attr-target-mv-overloads.cpp
  test/Sema/attr-target-mv-bad-target.c

Index: test/Sema/attr-target-mv-bad-target.c
===
--- test/Sema/attr-target-mv-bad-target.c
+++ test/Sema/attr-target-mv-bad-target.c
@@ -1,4 +1,3 @@
-// RUN: %clang_cc1 -triple x86_64-windows-pc  -fsyntax-only -verify %s
 // RUN: %clang_cc1 -triple arm-none-eabi  -fsyntax-only -verify %s
 
 int __attribute__((target("sse4.2"))) redecl1(void) { return 1; }
Index: test/CodeGenCXX/attr-target-mv-overloads.cpp
===
--- test/CodeGenCXX/attr-target-mv-overloads.cpp
+++ test/CodeGenCXX/attr-target-mv-overloads.cpp
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=LINUX
+// RUN: %clang_cc1 -std=c++11 -triple x86_64-windows-pc -emit-llvm %s -o - | FileCheck %s --check-prefix=WINDOWS
 
 int __attribute__((target("sse4.2"))) foo_overload(int) { return 0; }
 int __attribute__((target("arch=sandybridge"))) foo_overload(int);
@@ -13,38 +14,70 @@
   return foo_overload() + foo_overload(1);
 }
 
-// CHECK: @_Z12foo_overloadv.ifunc = ifunc i32 (), i32 ()* ()* @_Z12foo_overloadv.resolver
-// CHECK: @_Z12foo_overloadi.ifunc = ifunc i32 (i32), i32 (i32)* ()* @_Z12foo_overloadi.resolver
-
-
-// CHECK: define i32 @_Z12foo_overloadi.sse4.2(i32)
-// CHECK: ret i32 0
-// CHECK: define i32 @_Z12foo_overloadi.arch_ivybridge(i32)
-// CHECK: ret i32 1
-// CHECK: define i32 @_Z12foo_overloadi(i32)
-// CHECK: ret i32 2
-// CHECK: define i32 @_Z12foo_overloadv.sse4.2()
-// CHECK: ret i32 0
-// CHECK: define i32 @_Z12foo_overloadv.arch_ivybridge()
-// CHECK: ret i32 1
-// CHECK: define i32 @_Z12foo_overloadv()
-// CHECK: ret i32 2
-
-// CHECK: define i32 @_Z4bar2v()
-// CHECK: call i32 @_Z12foo_overloadv.ifunc()
-// CHECK: call i32 @_Z12foo_overloadi.ifunc(i32 1)
-
-// CHECK: define i32 ()* @_Z12foo_overloadv.resolver() comdat
-// CHECK: ret i32 ()* @_Z12foo_overloadv.arch_sandybridge
-// CHECK: ret i32 ()* @_Z12foo_overloadv.arch_ivybridge
-// CHECK: ret i32 ()* @_Z12foo_overloadv.sse4.2
-// CHECK: ret i32 ()* @_Z12foo_overloadv
-
-// CHECK: define i32 (i32)* @_Z12foo_overloadi.resolver() comdat
-// CHECK: ret i32 (i32)* @_Z12foo_overloadi.arch_sandybridge
-// CHECK: ret i32 (i32)* @_Z12foo_overloadi.arch_ivybridge
-// CHECK: ret i32 (i32)* @_Z12foo_overloadi.sse4.2
-// CHECK: ret i32 (i32)* @_Z12foo_overloadi
-
-// CHECK: declare i32 @_Z12foo_overloadv.arch_sandybridge()
-// CHECK: declare i32 @_Z12foo_overloadi.arch_sandybridge(i32)
+// LINUX: @_Z12foo_overloadv.ifunc = ifunc i32 (), i32 ()* ()* @_Z12foo_overloadv.resolver
+// LINUX: @_Z12foo_overloadi.ifunc = ifunc i32 (i32), i32 (i32)* ()* @_Z12foo_overloadi.resolver
+
+
+// LINUX: define i32 @_Z12foo_overloadi.sse4.2(i32)
+// LINUX: ret i32 0
+// LINUX: define i32 @_Z12foo_overloadi.arch_ivybridge(i32)
+// LINUX: ret i32 1
+// LINUX: define i32 @_Z12foo_overloadi(i32)
+// LINUX: ret i32 2
+// LINUX: define i32 @_Z12foo_overloadv.sse4.2()
+// LINUX: ret i32 0
+// LINUX: define i32 @_Z12foo_overloadv.arch_ivybridge()
+// LINUX: ret i32 1
+// LINUX: define i32 @_Z12foo_overloadv()
+// LINUX: ret i32 2
+
+// WINDOWS: define dso_local i32 @"?foo_overload@@YAHH@Z.s