[clang] [llvm] [IPO] Optimise variadic functions (PR #92850)

2024-05-24 Thread Jon Chesterfield via cfe-commits

JonChesterfield wrote:

Dropping this in favour of 
[93362](https://github.com/llvm/llvm-project/pull/93362) on risk assessment 
grounds.

This commit enabled ad hoc testing from wasm, x64, and aarch64. However if it's 
buggy, it'll show up on those targets, which should make the code owners 
reluctant to land it.

Going with pure amdgpu first is not great from the perspective of finding 
errors in this pass - since va_arg is currently a fatal_error, existing 
software on amdgpu doesn't tend to use variadic functions - but it does 
completely derisk the initial commit.

https://github.com/llvm/llvm-project/pull/92850
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [IPO] Optimise variadic functions (PR #92850)

2024-05-24 Thread Jon Chesterfield via cfe-commits

https://github.com/JonChesterfield closed 
https://github.com/llvm/llvm-project/pull/92850
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [IPO] Optimise variadic functions (PR #92850)

2024-05-20 Thread Jon Chesterfield via cfe-commits

JonChesterfield wrote:

[inline-then-fold-variadics.cpp](https://github.com/llvm/llvm-project/pull/92850/commits/15061bfbc2dc06de5bac32628389386cadaa5632#diff-0a9893e04ae7e0a5692ad93dfb73d6efa992953f7e9eebb68c1a3f4acd457e1e)
 is the motivating example for optimisation - simple variadic functions are 
removed entirely. Variadic logging wrappers around vfprintf and similar are 
similarly inlined.

Wasm might choose to use a call to this to replace the backend handling, they 
should each build the same frame passed by pointer. That would help offset the 
increase in compiler complexity from this patch. AMDGPU has a requirement for 
variadic function lowering, at least sufficient for libc. It is much easier to 
debug in wasm or x64 than on the GPU so I'm hoping this can land as a target 
independent pass and not end up in a GPU backend.

https://github.com/llvm/llvm-project/pull/92850
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [IPO] Optimise variadic functions (PR #92850)

2024-05-20 Thread via cfe-commits

github-actions[bot] wrote:




:warning: C/C++ code formatter, clang-format found issues in your code. 
:warning:



You can test this locally with the following command:


``bash
git-clang-format --diff 560c2fd3d427a5e2dc2361abde1142f3fda40253 
15061bfbc2dc06de5bac32628389386cadaa5632 -- clang/test/CodeGen/voidptr-vaarg.c 
clang/test/CodeGenCXX/inline-then-fold-variadics.cpp 
llvm/include/llvm/Transforms/IPO/ExpandVariadics.h 
llvm/lib/Transforms/IPO/ExpandVariadics.cpp 
clang/test/CodeGen/aarch64-ABI-align-packed.c 
llvm/include/llvm/InitializePasses.h llvm/lib/Passes/PassBuilder.cpp 
llvm/lib/Passes/PassBuilderPipelines.cpp
``





View the diff from clang-format here.


``diff
diff --git a/llvm/include/llvm/Transforms/IPO/ExpandVariadics.h 
b/llvm/include/llvm/Transforms/IPO/ExpandVariadics.h
index 02f3aa45ef..f7c9618bad 100644
--- a/llvm/include/llvm/Transforms/IPO/ExpandVariadics.h
+++ b/llvm/include/llvm/Transforms/IPO/ExpandVariadics.h
@@ -18,9 +18,9 @@ class OptimizationLevel;
 
 enum class ExpandVariadicsMode {
   Unspecified, // Use the implementation defaults
-  Disable, // Disable the pass entirely
-  Optimize, // Optimise without changing ABI
-  Lowering, // Change variadic calling convention
+  Disable, // Disable the pass entirely
+  Optimize,// Optimise without changing ABI
+  Lowering,// Change variadic calling convention
 };
 
 class ExpandVariadicsPass : public PassInfoMixin {
diff --git a/llvm/lib/Transforms/IPO/ExpandVariadics.cpp 
b/llvm/lib/Transforms/IPO/ExpandVariadics.cpp
index c04e16cb7b..e27c391d88 100644
--- a/llvm/lib/Transforms/IPO/ExpandVariadics.cpp
+++ b/llvm/lib/Transforms/IPO/ExpandVariadics.cpp
@@ -106,8 +106,7 @@ public:
   virtual Value *initializeVAList(LLVMContext &Ctx, IRBuilder<> &Builder,
   AllocaInst *, Value * /*buffer*/) = 0;
 
-  struct VAArgSlotInfo
-  {
+  struct VAArgSlotInfo {
 Align Align;   // With respect to the call frame
 bool Indirect; // Passed via a pointer
 bool Unknown;  // Cannot analyse this type, cannot transform the call
@@ -178,7 +177,6 @@ public:
   bool runOnModule(Module &M) override;
   bool runOnFunction(Module &M, IRBuilder<> &Builder, Function *F);
 
-
   bool rewriteABI() { return Mode == ExpandVariadicsMode::Lowering; }
 
   void memcpyVAListPointers(const DataLayout &DL, IRBuilder<> &Builder,
@@ -190,7 +188,6 @@ public:
ConstantInt::get(Type::getInt32Ty(Ctx), Size));
   }
 
-
   template 
   bool expandIntrinsicUsers(Module &M, IRBuilder<> &Builder,
 PointerType *ArgType) {
@@ -217,7 +214,6 @@ public:
   bool expandVAIntrinsicCall(IRBuilder<> &Builder, const DataLayout &DL,
  VACopyInst *Inst);
 
-  
   FunctionType *inlinableVariadicFunctionType(Module &M, FunctionType *FTy) {
 // The type of "FTy" with the ... removed and a va_list appended
 SmallVector ArgTypes(FTy->param_begin(), FTy->param_end());
@@ -492,7 +488,8 @@ bool ExpandVariadics::runOnFunction(Module &M, IRBuilder<> 
&Builder,
   assert(VariadicWrapper->isDeclaration());
   assert(OriginalFunction->use_empty());
 
-  // Create a new function taking va_list containing the implementation of the 
original
+  // Create a new function taking va_list containing the implementation of the
+  // original
   Function *FixedArityReplacement =
   deriveFixedArityReplacement(M, Builder, OriginalFunction);
   assert(OriginalFunction->isDeclaration());
@@ -511,7 +508,6 @@ bool ExpandVariadics::runOnFunction(Module &M, IRBuilder<> 
&Builder,
   // 2. a variadic function that unconditionally calls a fixed arity 
replacement
   // 3. a fixed arity function equivalent to the original function
 
-  
   // Replace known calls to the variadic with calls to the va_list equivalent
   for (User *U : llvm::make_early_inc_range(VariadicWrapper->users())) {
 if (CallBase *CB = dyn_cast(U)) {
@@ -524,13 +520,11 @@ bool ExpandVariadics::runOnFunction(Module &M, 
IRBuilder<> &Builder,
 }
   }
 
-
   Function *const ExternallyAccessible =
-  rewriteABI() ? FixedArityReplacement : VariadicWrapper;  
+  rewriteABI() ? FixedArityReplacement : VariadicWrapper;
   Function *const InternalOnly =
-rewriteABI() ? VariadicWrapper : FixedArityReplacement;
+  rewriteABI() ? VariadicWrapper : FixedArityReplacement;
 
-  
   // care needed over other attributes, metadata etc
 
   ExternallyAccessible->setLinkage(OriginalFunction->getLinkage());
@@ -548,10 +542,10 @@ bool ExpandVariadics::runOnFunction(Module &M, 
IRBuilder<> &Builder,
   if (rewriteABI()) {
 // All known calls to the function have been removed by expandCall
 // Resolve everything else by replace all uses with
-
+
 VariadicWrapper->replaceAllUsesWith(FixedArityReplacement);
-
-assert (VariadicWrapper->use_empty());
+
+assert(VariadicWrapper->use_empty());
 VariadicWrapper->eraseFromParent();
   }
 
@@ -613,10 +

[clang] [llvm] [IPO] Optimise variadic functions (PR #92850)

2024-05-20 Thread Jon Chesterfield via cfe-commits


@@ -1,5 +1,6 @@
 // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -triple aarch64 -target-feature +neon -emit-llvm -O2 -o - 
%s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +neon -emit-llvm -O2 -o - 
%s -mllvm -expand-variadics-override=disable | FileCheck %s
+

JonChesterfield wrote:

^this disable shouldn't be necessary, having trouble working out what the 
filecheck regex are complaining about

https://github.com/llvm/llvm-project/pull/92850
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [IPO] Optimise variadic functions (PR #92850)

2024-05-20 Thread via cfe-commits

llvmbot wrote:



@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-backend-x86

Author: Jon Chesterfield (JonChesterfield)


Changes

Replace variadic functions with equivalent functions taking a va_list. This 
composes with optimisations like inlining to give zero cost variadics. 
Scheduled before the inliner at O1 and callable from a backend to provide a 
lowering implementation on IR.

Implementation is complete for webassembly. The frame constructed is the same 
as that done by the backend, a later patch will propose replacing the bespoke 
lowering there with a call to this pass. Most of the target-independent tests 
for this pass use wasm as a convenient triple.

Implemented for simple types on x64 and aarch64 to show that more complicated 
va_list constructs work as expected. Complex types left to a later patch to 
help keep the test quantity reasonable here. Likewise exceptions are left for a 
later patch.

Implementing nvptx and amdgpu in later patches as they're straightforward given 
this pass but may need discussion over what the proper calling convention 
should be. Both are expected to look similar to wasm. That would be consistent 
with ptx.

Implementing for targets that have va_arg selected in place for is mechanical - 
construct whatever frame layout is the complement of va_arg, set up a va_list 
iterator, add the tests.

The hope is that this is sufficiently useful to land as is. It'll remove any 
known calls for webassembly and the majority of known calls likely to exist in 
practice on x64 and aarch64. Having the pass in tree would be very helpful for 
reviews of amdgpu and nvptx variadic lowering, letting those focus on the what 
as opposed to the how.

---

Patch is 198.86 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/92850.diff


28 Files Affected:

- (modified) clang/test/CodeGen/aarch64-ABI-align-packed.c (+2-1) 
- (added) clang/test/CodeGen/voidptr-vaarg.c (+478) 
- (added) clang/test/CodeGenCXX/inline-then-fold-variadics.cpp (+128) 
- (modified) llvm/include/llvm/InitializePasses.h (+1) 
- (added) llvm/include/llvm/Transforms/IPO/ExpandVariadics.h (+43) 
- (modified) llvm/lib/Passes/PassBuilder.cpp (+1) 
- (modified) llvm/lib/Passes/PassBuilderPipelines.cpp (+4) 
- (modified) llvm/lib/Passes/PassRegistry.def (+1) 
- (modified) llvm/lib/Transforms/IPO/CMakeLists.txt (+1) 
- (added) llvm/lib/Transforms/IPO/ExpandVariadics.cpp (+1250) 
- (added) llvm/test/CodeGen/AArch64/expand-variadic-call-apcs64-linux.ll (+289) 
- (added) llvm/test/CodeGen/WebAssembly/expand-variadic-call.ll (+483) 
- (added) llvm/test/CodeGen/WebAssembly/vararg-frame.ll (+525) 
- (added) llvm/test/CodeGen/X86/expand-variadic-call-x64-linux.ll (+244) 
- (modified) llvm/test/Other/new-pm-defaults.ll (+1) 
- (modified) llvm/test/Other/new-pm-thinlto-postlink-defaults.ll (+1) 
- (modified) llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll (+1) 
- (modified) llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll 
(+1-1) 
- (modified) llvm/test/Other/new-pm-thinlto-prelink-defaults.ll (+1) 
- (modified) llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll (+1) 
- (modified) llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll (+1) 
- (added) 
llvm/test/Transforms/ExpandVariadics/expand-va-intrinsic-split-linkage.ll 
(+239) 
- (added) 
llvm/test/Transforms/ExpandVariadics/expand-va-intrinsic-split-simple.ll (+212) 
- (added) llvm/test/Transforms/ExpandVariadics/indirect-calls.ll (+58) 
- (added) llvm/test/Transforms/ExpandVariadics/intrinsics.ll (+117) 
- (added) llvm/test/Transforms/ExpandVariadics/pass-byval.ll (+81) 
- (added) llvm/test/Transforms/ExpandVariadics/pass-integers.ll (+344) 
- (modified) llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn (+1) 


``diff
diff --git a/clang/test/CodeGen/aarch64-ABI-align-packed.c 
b/clang/test/CodeGen/aarch64-ABI-align-packed.c
index 0349ebc8cc639..9ce051369f390 100644
--- a/clang/test/CodeGen/aarch64-ABI-align-packed.c
+++ b/clang/test/CodeGen/aarch64-ABI-align-packed.c
@@ -1,5 +1,6 @@
 // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -triple aarch64 -target-feature +neon -emit-llvm -O2 -o - 
%s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +neon -emit-llvm -O2 -o - 
%s -mllvm -expand-variadics-override=disable | FileCheck %s
+
 #include 
 #include 
 
diff --git a/clang/test/CodeGen/voidptr-vaarg.c 
b/clang/test/CodeGen/voidptr-vaarg.c
new file mode 100644
index 0..d023ddf0fb5d2
--- /dev/null
+++ b/clang/test/CodeGen/voidptr-vaarg.c
@@ -0,0 +1,478 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: webassembly-registered-target
+// RUN: %clang_cc1 -triple wasm32-unknown-unknown -emit-llvm -o - %s | 
FileCheck %s
+
+// Multiple targets use emitVoidPtrVAArg to lower va_arg instructions in clang
+// PPC is complicated, excluding from this case analysis
+// ForceRightAdjust is false for all non-PPC