[PATCH] D76078: [AArch64][SVE] Add a pass for SVE intrinsic optimisations
This revision was automatically updated to reflect the committed changes. Closed by commit rG36c76de6789c: [AArch64][SVE] Add a pass for SVE intrinsic optimisations (authored by kmclaughlin). Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D76078/new/ https://reviews.llvm.org/D76078 Files: llvm/lib/Target/AArch64/AArch64.h llvm/lib/Target/AArch64/AArch64TargetMachine.cpp llvm/lib/Target/AArch64/CMakeLists.txt llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp llvm/test/CodeGen/AArch64/O3-pipeline.ll llvm/test/CodeGen/AArch64/sve-intrinsic-opts-ptest.ll llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll Index: llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll === --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll @@ -0,0 +1,203 @@ +; RUN: opt -S -sve-intrinsic-opts -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck --check-prefix OPT %s + +define @reinterpret_test_h( %a) { +; OPT-LABEL: @reinterpret_test_h( +; OPT-NOT: convert +; OPT: ret %a + %1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %a) + %2 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %1) + ret %2 +} + +; Reinterprets are not redundant because the second reinterpret zeros the +; lanes that don't exist within its input. +define @reinterpret_test_h_rev( %a) { +; OPT-LABEL: @reinterpret_test_h_rev( +; OPT: %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %a) +; OPT-NEXT: %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %1) +; OPT-NEXT: ret %2 + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %a) + %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %1) + ret %2 +} + +define @reinterpret_test_w( %a) { +; OPT-LABEL: @reinterpret_test_w( +; OPT-NOT: convert +; OPT: ret %a + %1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %a) + %2 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %1) + ret %2 +} + +; Reinterprets are not redundant because the second reinterpret zeros the +; lanes that don't exist within its input. +define @reinterpret_test_w_rev( %a) { +; OPT-LABEL: @reinterpret_test_w_rev( +; OPT: %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %a) +; OPT-NEXT: %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %1) +; OPT-NEXT: ret %2 + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %a) + %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %1) + ret %2 +} + +define @reinterpret_test_d( %a) { +; OPT-LABEL: @reinterpret_test_d( +; OPT-NOT: convert +; OPT: ret %a + %1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %a) + %2 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %1) + ret %2 +} + +; Reinterprets are not redundant because the second reinterpret zeros the +; lanes that don't exist within its input. +define @reinterpret_test_d_rev( %a) { +; OPT-LABEL: @reinterpret_test_d_rev( +; OPT: %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %a) +; OPT-NEXT: %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %1) +; OPT-NEXT: ret %2 + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %a) + %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %1) + ret %2 +} + +define @reinterpret_reductions(i32 %cond, %a, %b, %c) { +; OPT-LABEL: reinterpret_reductions +; OPT-NOT: convert +; OPT-NOT: phi +; OPT: phi [ %a, %br_phi_a ], [ %b, %br_phi_b ], [ %c, %br_phi_c ] +; OPT-NOT: convert +; OPT: ret + +entry: + switch i32 %cond, label %br_phi_c [ + i32 43, label %br_phi_a + i32 45, label %br_phi_b + ] + +br_phi_a: + %a1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %a) + br label %join + +br_phi_b: + %b1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %b) + br label %join + +br_phi_c: + %c1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %c) + br label %join + +join: + %pg = phi [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ] + %pg1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + ret %pg1 +} + +; No transform as the reinterprets are converting from different types (nxv2i1 & nxv4i1) +; As the incoming values to the phi must all be the same type, we cannot remove the reinterprets. +define @reinterpret_reductions_1(i32 %cond, %a, %b, %c) { +; OPT-LABEL: reinterpret_reductions_1 +; OPT: convert +; OPT: phi [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ] +; OPT-NOT: phi +; OPT: tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) +; OPT: ret + +entry: + switch i32 %cond, label %br_phi_c [ + i32 43, label %br_phi_a + i32 45, label %br_phi_b + ] + +br_phi_a: + %a1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %a) + br label %join + +br_phi_b: + %b1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %b) + br label %join + +br_phi
[PATCH] D76078: [AArch64][SVE] Add a pass for SVE intrinsic optimisations
efriedma accepted this revision. efriedma added a comment. This revision is now accepted and ready to land. LGTM CHANGES SINCE LAST ACTION https://reviews.llvm.org/D76078/new/ https://reviews.llvm.org/D76078 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D76078: [AArch64][SVE] Add a pass for SVE intrinsic optimisations
kmclaughlin added a comment. Ping :) CHANGES SINCE LAST ACTION https://reviews.llvm.org/D76078/new/ https://reviews.llvm.org/D76078 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D76078: [AArch64][SVE] Add a pass for SVE intrinsic optimisations
kmclaughlin updated this revision to Diff 252575. kmclaughlin marked an inline comment as done. kmclaughlin added a comment. Use SmallSetVector for the list of functions gathered by runOnModule to preserve the order of iteration CHANGES SINCE LAST ACTION https://reviews.llvm.org/D76078/new/ https://reviews.llvm.org/D76078 Files: llvm/lib/Target/AArch64/AArch64.h llvm/lib/Target/AArch64/AArch64TargetMachine.cpp llvm/lib/Target/AArch64/CMakeLists.txt llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp llvm/test/CodeGen/AArch64/O3-pipeline.ll llvm/test/CodeGen/AArch64/sve-intrinsic-opts-ptest.ll llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll Index: llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll === --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll @@ -0,0 +1,203 @@ +; RUN: opt -S -sve-intrinsic-opts -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck --check-prefix OPT %s + +define @reinterpret_test_h( %a) { +; OPT-LABEL: @reinterpret_test_h( +; OPT-NOT: convert +; OPT: ret %a + %1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %a) + %2 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %1) + ret %2 +} + +; Reinterprets are not redundant because the second reinterpret zeros the +; lanes that don't exist within its input. +define @reinterpret_test_h_rev( %a) { +; OPT-LABEL: @reinterpret_test_h_rev( +; OPT: %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %a) +; OPT-NEXT: %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %1) +; OPT-NEXT: ret %2 + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %a) + %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %1) + ret %2 +} + +define @reinterpret_test_w( %a) { +; OPT-LABEL: @reinterpret_test_w( +; OPT-NOT: convert +; OPT: ret %a + %1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %a) + %2 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %1) + ret %2 +} + +; Reinterprets are not redundant because the second reinterpret zeros the +; lanes that don't exist within its input. +define @reinterpret_test_w_rev( %a) { +; OPT-LABEL: @reinterpret_test_w_rev( +; OPT: %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %a) +; OPT-NEXT: %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %1) +; OPT-NEXT: ret %2 + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %a) + %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %1) + ret %2 +} + +define @reinterpret_test_d( %a) { +; OPT-LABEL: @reinterpret_test_d( +; OPT-NOT: convert +; OPT: ret %a + %1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %a) + %2 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %1) + ret %2 +} + +; Reinterprets are not redundant because the second reinterpret zeros the +; lanes that don't exist within its input. +define @reinterpret_test_d_rev( %a) { +; OPT-LABEL: @reinterpret_test_d_rev( +; OPT: %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %a) +; OPT-NEXT: %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %1) +; OPT-NEXT: ret %2 + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %a) + %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %1) + ret %2 +} + +define @reinterpret_reductions(i32 %cond, %a, %b, %c) { +; OPT-LABEL: reinterpret_reductions +; OPT-NOT: convert +; OPT-NOT: phi +; OPT: phi [ %a, %br_phi_a ], [ %b, %br_phi_b ], [ %c, %br_phi_c ] +; OPT-NOT: convert +; OPT: ret + +entry: + switch i32 %cond, label %br_phi_c [ + i32 43, label %br_phi_a + i32 45, label %br_phi_b + ] + +br_phi_a: + %a1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %a) + br label %join + +br_phi_b: + %b1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %b) + br label %join + +br_phi_c: + %c1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %c) + br label %join + +join: + %pg = phi [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ] + %pg1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + ret %pg1 +} + +; No transform as the reinterprets are converting from different types (nxv2i1 & nxv4i1) +; As the incoming values to the phi must all be the same type, we cannot remove the reinterprets. +define @reinterpret_reductions_1(i32 %cond, %a, %b, %c) { +; OPT-LABEL: reinterpret_reductions_1 +; OPT: convert +; OPT: phi [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ] +; OPT-NOT: phi +; OPT: tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) +; OPT: ret + +entry: + switch i32 %cond, label %br_phi_c [ + i32 43, label %br_phi_a + i32 45, label %br_phi_b + ] + +br_phi_a: + %a1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %a) + br label %join + +br_phi_b: + %b1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %b) + br label %join + +br_phi
[PATCH] D76078: [AArch64][SVE] Add a pass for SVE intrinsic optimisations
efriedma added inline comments. Comment at: llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp:233 + bool Changed = false; + for (auto *F : Functions) { +DominatorTree *DT = &getAnalysis(*F).getDomTree(); Iterating over a SmallPtrSet is non-deterministic. In this context, probably SetVector is the right data structure. CHANGES SINCE LAST ACTION https://reviews.llvm.org/D76078/new/ https://reviews.llvm.org/D76078 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D76078: [AArch64][SVE] Add a pass for SVE intrinsic optimisations
kmclaughlin updated this revision to Diff 252368. kmclaughlin marked 3 inline comments as done. kmclaughlin added a comment. Use SmallPtrSet instead of SmallVector for storing functions found by runOnModule Add more comments to clarify the purpose of the pass and some of the negative reinterpret tests CHANGES SINCE LAST ACTION https://reviews.llvm.org/D76078/new/ https://reviews.llvm.org/D76078 Files: llvm/lib/Target/AArch64/AArch64.h llvm/lib/Target/AArch64/AArch64TargetMachine.cpp llvm/lib/Target/AArch64/CMakeLists.txt llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp llvm/test/CodeGen/AArch64/O3-pipeline.ll llvm/test/CodeGen/AArch64/sve-intrinsic-opts-ptest.ll llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll Index: llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll === --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll @@ -0,0 +1,203 @@ +; RUN: opt -S -sve-intrinsic-opts -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck --check-prefix OPT %s + +define @reinterpret_test_h( %a) { +; OPT-LABEL: @reinterpret_test_h( +; OPT-NOT: convert +; OPT: ret %a + %1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %a) + %2 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %1) + ret %2 +} + +; Reinterprets are not redundant because the second reinterpret zeros the +; lanes that don't exist within its input. +define @reinterpret_test_h_rev( %a) { +; OPT-LABEL: @reinterpret_test_h_rev( +; OPT: %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %a) +; OPT-NEXT: %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %1) +; OPT-NEXT: ret %2 + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %a) + %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %1) + ret %2 +} + +define @reinterpret_test_w( %a) { +; OPT-LABEL: @reinterpret_test_w( +; OPT-NOT: convert +; OPT: ret %a + %1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %a) + %2 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %1) + ret %2 +} + +; Reinterprets are not redundant because the second reinterpret zeros the +; lanes that don't exist within its input. +define @reinterpret_test_w_rev( %a) { +; OPT-LABEL: @reinterpret_test_w_rev( +; OPT: %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %a) +; OPT-NEXT: %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %1) +; OPT-NEXT: ret %2 + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %a) + %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %1) + ret %2 +} + +define @reinterpret_test_d( %a) { +; OPT-LABEL: @reinterpret_test_d( +; OPT-NOT: convert +; OPT: ret %a + %1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %a) + %2 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %1) + ret %2 +} + +; Reinterprets are not redundant because the second reinterpret zeros the +; lanes that don't exist within its input. +define @reinterpret_test_d_rev( %a) { +; OPT-LABEL: @reinterpret_test_d_rev( +; OPT: %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %a) +; OPT-NEXT: %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %1) +; OPT-NEXT: ret %2 + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %a) + %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %1) + ret %2 +} + +define @reinterpret_reductions(i32 %cond, %a, %b, %c) { +; OPT-LABEL: reinterpret_reductions +; OPT-NOT: convert +; OPT-NOT: phi +; OPT: phi [ %a, %br_phi_a ], [ %b, %br_phi_b ], [ %c, %br_phi_c ] +; OPT-NOT: convert +; OPT: ret + +entry: + switch i32 %cond, label %br_phi_c [ + i32 43, label %br_phi_a + i32 45, label %br_phi_b + ] + +br_phi_a: + %a1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %a) + br label %join + +br_phi_b: + %b1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %b) + br label %join + +br_phi_c: + %c1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %c) + br label %join + +join: + %pg = phi [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ] + %pg1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + ret %pg1 +} + +; No transform as the reinterprets are converting from different types (nxv2i1 & nxv4i1) +; As the incoming values to the phi must all be the same type, we cannot remove the reinterprets. +define @reinterpret_reductions_1(i32 %cond, %a, %b, %c) { +; OPT-LABEL: reinterpret_reductions_1 +; OPT: convert +; OPT: phi [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ] +; OPT-NOT: phi +; OPT: tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) +; OPT: ret + +entry: + switch i32 %cond, label %br_phi_c [ + i32 43, label %br_phi_a + i32 45, label %br_phi_b + ] + +br_phi_a: + %a1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %a) + br label %join + +br_phi_b: + %b1 = tail call
[PATCH] D76078: [AArch64][SVE] Add a pass for SVE intrinsic optimisations
andwar added a comment. Thanks for the updates @kmclaughlin ! Would you mind adding a comment to clearly mark the negative tests? E.g. for `@reinterpret_reductions_1`? Maybe also a comment `why` a particular case is not optimised? You've already done that for some tests, but not all of them. Comment at: llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp:10 +// +// Performs general IR level optimizations on SVE intrinsics. +// I don't see any documentation for the pass that's being added here (apart from the commit msg). Perhaps it's worth expanding this comment? Comment at: llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp:221 +bool SVEIntrinsicOpts::optimizeFunctions( +SmallVectorImpl &Functions) { + bool Changed = false; I think that you could simplify things a bit by using `SmallPtrSet` instead: http://llvm.org/docs/ProgrammersManual.html#dss-smallptrset. With a set you can avoid explicit checks like this: ``` std::find(Functions.begin(), Functions.end(), Inst->getFunction()) == Functions.end() ``` With `SmallPtrSet` you can write less code :) Comment at: llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp:241 + + for (auto &F : M.getFunctionList()) { +if (!F.isDeclaration()) Could you decorate this `for` loop with a comment explaining `what` kind of data is generated here and `why`? Ta! CHANGES SINCE LAST ACTION https://reviews.llvm.org/D76078/new/ https://reviews.llvm.org/D76078 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D76078: [AArch64][SVE] Add a pass for SVE intrinsic optimisations
kmclaughlin added a comment. Thanks for reviewing this, @efriedma & @andwar! Comment at: llvm/lib/Target/AArch64/AArch64TargetMachine.cpp:441 + // Expand any SVE vector library calls that we can't code generate directly. + bool ExpandToOptimize = (TM->getOptLevel() != CodeGenOpt::None); + if (EnableSVEIntrinsicOpts && TM->getOptLevel() == CodeGenOpt::Aggressive) efriedma wrote: > unused bool? Removed Comment at: llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp:120 +if (!Reinterpret || +RequiredType != Reinterpret->getArgOperand(0)->getType()) + return false; andwar wrote: > Isn't it guaranteed that `RequiredType == > Reinterpret->getArgOperand(0)->getType()` is always true? I.e., `PN` and the > incoming values have identical type. The incoming values to `PN` will all have the same type, but this is making sure that the reinterprets are all converting from the same type (there is a test for this in sve-intrinsic-opts-reinterpret.ll called `reinterpret_reductions_1`, where the arguments to convert.to.svbool are a mix of nxv2i1 and nxv4i1) Comment at: llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp:224 + bool Changed = false; + for (auto II = BB->begin(), IE = BB->end(); II != IE;) { +Instruction *I = &(*II); andwar wrote: > 1. Could this be a for-range loop instead? > > 2. This loop seems to be a perfect candidate for `make_early_inc_range` > (https://github.com/llvm/llvm-project/blob/172f1460ae05ab5c33c757142c8bdb10acfbdbe1/llvm/include/llvm/ADT/STLExtras.h#L499), > e.g. > ``` > for (Instruction &I : make_early_inc_range(BB)) > Changed |= optimizeIntrinsic(&I); > ``` Changed this to use `make_early_inc_range` as suggested Comment at: llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp:234 + DT = &getAnalysis().getDomTree(); + bool Changed = false; + efriedma wrote: > You might want to check whether the module actually declares any of the SVE > intrinsics before you iterate over the whole function. Thanks for the suggestion - I changed this to a module pass so that we can check if any of the SVE intrinsics we are interested in are declared first. Comment at: llvm/test/CodeGen/AArch64/sve-intrinsic-opts-ptest.ll:18 +; OPT-LABEL: ptest_any2 +; OPT: %out = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %1, %2) + %mask = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) andwar wrote: > What's `%1` and `%2`? Is it worth adding the calls that generated them in the > expected output? I think that would make sense. I've added `%1` and `%2` to the expected output and added more checks to the other tests here. CHANGES SINCE LAST ACTION https://reviews.llvm.org/D76078/new/ https://reviews.llvm.org/D76078 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D76078: [AArch64][SVE] Add a pass for SVE intrinsic optimisations
kmclaughlin updated this revision to Diff 251643. kmclaughlin marked 13 inline comments as done. kmclaughlin added a comment. - Changed this from a function pass to a module pass & now check if any of the relevant SVE intrinsics are declared first before iterating over functions - Added more checks on expected output in the tests CHANGES SINCE LAST ACTION https://reviews.llvm.org/D76078/new/ https://reviews.llvm.org/D76078 Files: llvm/lib/Target/AArch64/AArch64.h llvm/lib/Target/AArch64/AArch64TargetMachine.cpp llvm/lib/Target/AArch64/CMakeLists.txt llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp llvm/test/CodeGen/AArch64/O3-pipeline.ll llvm/test/CodeGen/AArch64/sve-intrinsic-opts-ptest.ll llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll Index: llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll === --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll @@ -0,0 +1,199 @@ +; RUN: opt -S -sve-intrinsic-opts -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck --check-prefix OPT %s + +define @reinterpret_test_h( %a) { +; OPT-LABEL: @reinterpret_test_h( +; OPT-NOT: convert +; OPT: ret %a + %1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %a) + %2 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %1) + ret %2 +} + +; Reinterprets are not redundant because the second reinterpret zeros the +; lanes that don't exist within its input. +define @reinterpret_test_h_rev( %a) { +; OPT-LABEL: @reinterpret_test_h_rev( +; OPT: %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %a) +; OPT-NEXT: %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %1) +; OPT-NEXT: ret %2 + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %a) + %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %1) + ret %2 +} + +define @reinterpret_test_w( %a) { +; OPT-LABEL: @reinterpret_test_w( +; OPT-NOT: convert +; OPT: ret %a + %1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %a) + %2 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %1) + ret %2 +} + +; Reinterprets are not redundant because the second reinterpret zeros the +; lanes that don't exist within its input. +define @reinterpret_test_w_rev( %a) { +; OPT-LABEL: @reinterpret_test_w_rev( +; OPT: %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %a) +; OPT-NEXT: %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %1) +; OPT-NEXT: ret %2 + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %a) + %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %1) + ret %2 +} + +define @reinterpret_test_d( %a) { +; OPT-LABEL: @reinterpret_test_d( +; OPT-NOT: convert +; OPT: ret %a + %1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %a) + %2 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %1) + ret %2 +} + +; Reinterprets are not redundant because the second reinterpret zeros the +; lanes that don't exist within its input. +define @reinterpret_test_d_rev( %a) { +; OPT-LABEL: @reinterpret_test_d_rev( +; OPT: %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %a) +; OPT-NEXT: %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %1) +; OPT-NEXT: ret %2 + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %a) + %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %1) + ret %2 +} + +define @reinterpret_reductions(i32 %cond, %a, %b, %c) { +; OPT-LABEL: reinterpret_reductions +; OPT-NOT: convert +; OPT-NOT: phi +; OPT: phi [ %a, %br_phi_a ], [ %b, %br_phi_b ], [ %c, %br_phi_c ] +; OPT-NOT: convert +; OPT: ret + +entry: + switch i32 %cond, label %br_phi_c [ + i32 43, label %br_phi_a + i32 45, label %br_phi_b + ] + +br_phi_a: + %a1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %a) + br label %join + +br_phi_b: + %b1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %b) + br label %join + +br_phi_c: + %c1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %c) + br label %join + +join: + %pg = phi [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ] + %pg1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + ret %pg1 +} + +define @reinterpret_reductions_1(i32 %cond, %a, %b, %c) { +; OPT-LABEL: reinterpret_reductions_1 +; OPT: convert +; OPT: phi [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ] +; OPT-NOT: phi +; OPT: tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) +; OPT: ret + +entry: + switch i32 %cond, label %br_phi_c [ + i32 43, label %br_phi_a + i32 45, label %br_phi_b + ] + +br_phi_a: + %a1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %a) + br label %join + +br_phi_b: + %b1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %b) + br label %join + +br_phi_c: + %c1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %c) + br label %j
[PATCH] D76078: [AArch64][SVE] Add a pass for SVE intrinsic optimisations
andwar added a comment. > Have you considered adding an interface for the new PM? Please ignore that comment ^^^. I incorrectly assumed that the backend has also been ported to the new PM, but that's not the case. In particular, `llc` will only use the legacy PM anyway: https://github.com/llvm/llvm-project/blob/55b92dcb35a0758bea294ab6e42f9954ac06cac3/llvm/tools/llc/llc.cpp#L510 Apologies for the confusion! Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D76078/new/ https://reviews.llvm.org/D76078 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D76078: [AArch64][SVE] Add a pass for SVE intrinsic optimisations
andwar added a comment. Cheers for working on this @kmclaughlin ! Have you considered adding an interface for the new PM? You could check this for reference: https://reviews.llvm.org/rGd6de5f12d485a85504bc99d384a85634574a27e2 (also implements a FunctionPass). Comment at: llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp:30 + +#define DEBUG_TYPE "sve-intrinsicopts" + In AArch64TargetMachine.cpp it's: ``` static cl::opt EnableSVEIntrinsicOpts( "aarch64-sve-intrinsic-opts", cl::Hidden, cl::desc("Enable SVE intrinsic opts"), cl::init(true)); ``` so it probably make sense to use `see-intrinsic-opts` here as well? Comment at: llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp:110 + auto *PN = dyn_cast(X->getOperand(0)); + if (!PN) +return false; Given where this method is called, this is guaranteed to be always non-null. Perhaps `assert` instead? Comment at: llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp:120 +if (!Reinterpret || +RequiredType != Reinterpret->getArgOperand(0)->getType()) + return false; Isn't it guaranteed that `RequiredType == Reinterpret->getArgOperand(0)->getType()` is always true? I.e., `PN` and the incoming values have identical type. Comment at: llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp:125 + // Create the new Phi + LLVMContext &C1 = PN->getContext(); + IRBuilder<> Builder(C1); [nit] Perhaps `Ctx` instead of `C1`? Comment at: llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp:131 + + for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) { +auto *Reinterpret = cast(PN->getIncomingValue(i)); `i` -> `I` Comment at: llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp:187 + // elide away both reinterprets if there are no other users of Y. + if (auto *Y = isReinterpretToSVBool(I->getArgOperand(0))) { +Value *SourceVal = Y->getArgOperand(0); I'd be tempted to rewrite this to use early exit (https://llvm.org/docs/CodingStandards.html#use-early-exits-and-continue-to-simplify-code): ``` auto *Y = isReinterpretToSVBool(I->getArgOperand(0)); if (nullptr == Y) return false; // The rest of the function ``` Comment at: llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp:224 + bool Changed = false; + for (auto II = BB->begin(), IE = BB->end(); II != IE;) { +Instruction *I = &(*II); 1. Could this be a for-range loop instead? 2. This loop seems to be a perfect candidate for `make_early_inc_range` (https://github.com/llvm/llvm-project/blob/172f1460ae05ab5c33c757142c8bdb10acfbdbe1/llvm/include/llvm/ADT/STLExtras.h#L499), e.g. ``` for (Instruction &I : make_early_inc_range(BB)) Changed |= optimizeIntrinsic(&I); ``` Comment at: llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp:240 + ReversePostOrderTraversal RPOT(Root); + for (auto I = RPOT.begin(), E = RPOT.end(); I != E; ++I) { +Changed |= optimizeBlock(*I); AFAIK, this iterates over BBs so `I` should be replaced with `BB`. Also, perhaps `for (auto *BB : RPOT) {` instead? Comment at: llvm/test/CodeGen/AArch64/sve-intrinsic-opts-ptest.ll:18 +; OPT-LABEL: ptest_any2 +; OPT: %out = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1( %1, %2) + %mask = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) What's `%1` and `%2`? Is it worth adding the calls that generated them in the expected output? Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D76078/new/ https://reviews.llvm.org/D76078 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D76078: [AArch64][SVE] Add a pass for SVE intrinsic optimisations
efriedma added inline comments. Comment at: llvm/lib/Target/AArch64/AArch64TargetMachine.cpp:441 + // Expand any SVE vector library calls that we can't code generate directly. + bool ExpandToOptimize = (TM->getOptLevel() != CodeGenOpt::None); + if (EnableSVEIntrinsicOpts && TM->getOptLevel() == CodeGenOpt::Aggressive) unused bool? Comment at: llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp:56 + + static bool processPhiNode(Instruction *I); + `processPhiNode(IntrinsicInst *I)`? Comment at: llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp:109 + + auto *PN = dyn_cast(X->getOperand(0)); + if (!PN) Please use getArgOperand() to get the arguments of calls. Comment at: llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp:234 + DT = &getAnalysis().getDomTree(); + bool Changed = false; + You might want to check whether the module actually declares any of the SVE intrinsics before you iterate over the whole function. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D76078/new/ https://reviews.llvm.org/D76078 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D76078: [AArch64][SVE] Add a pass for SVE intrinsic optimisations
kmclaughlin created this revision. kmclaughlin added reviewers: sdesmalen, andwar, efriedma, cameron.mcinally, c-rhodes. Herald added subscribers: danielkiss, psnobl, rkruppe, hiraditya, kristof.beyls, tschuett, mgorny. Herald added a reviewer: rengolin. Herald added a project: LLVM. Creates the SVEIntrinsicOpts pass. In this patch, the pass tries to remove unnecessary reinterpret intrinsics which convert to and from svbool_t (llvm.aarch64.sve.convert.[to|from].svbool) For example, the reinterprets below are redundant: %1 = call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %a) %2 = call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %1) The pass also looks for ptest intrinsics and phi instructions where the operands are being needlessly converted to and from svbool_t. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D76078 Files: llvm/lib/Target/AArch64/AArch64.h llvm/lib/Target/AArch64/AArch64TargetMachine.cpp llvm/lib/Target/AArch64/CMakeLists.txt llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp llvm/test/CodeGen/AArch64/O3-pipeline.ll llvm/test/CodeGen/AArch64/sve-intrinsic-opts-ptest.ll llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll Index: llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll === --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll @@ -0,0 +1,196 @@ +; RUN: opt -S -sve-intrinsicopts -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck --check-prefix OPT %s + +define @reinterpret_test_h( %a) { +; OPT-LABEL: @reinterpret_test_h( +; OPT: ret %a + %1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %a) + %2 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %1) + ret %2 +} + +; Reinterprets are not redundant because the second reinterpret zeros the +; lanes that don't exist within its input. +define @reinterpret_test_h_rev( %a) { +; OPT-LABEL: @reinterpret_test_h_rev( +; OPT: %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %a) +; OPT-NEXT: %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %1) +; OPT-NEXT: ret %2 + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %a) + %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %1) + ret %2 +} + +define @reinterpret_test_w( %a) { +; OPT-LABEL: @reinterpret_test_w( +; OPT: ret %a + %1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %a) + %2 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %1) + ret %2 +} + +; Reinterprets are not redundant because the second reinterpret zeros the +; lanes that don't exist within its input. +define @reinterpret_test_w_rev( %a) { +; OPT-LABEL: @reinterpret_test_w_rev( +; OPT: %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %a) +; OPT-NEXT: %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %1) +; OPT-NEXT: ret %2 + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %a) + %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %1) + ret %2 +} + +define @reinterpret_test_d( %a) { +; OPT-LABEL: @reinterpret_test_d( +; OPT: ret %a + %1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %a) + %2 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %1) + ret %2 +} + +; Reinterprets are not redundant because the second reinterpret zeros the +; lanes that don't exist within its input. +define @reinterpret_test_d_rev( %a) { +; OPT-LABEL: @reinterpret_test_d_rev( +; OPT: %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %a) +; OPT-NEXT: %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %1) +; OPT-NEXT: ret %2 + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %a) + %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %1) + ret %2 +} + +define @reinterpret_reductions(i32 %cond, %a, %b, %c) { +; OPT-LABEL: reinterpret_reductions +; OPT-NOT: convert +; OPT-NOT: phi +; OPT: phi [ %a, %br_phi_a ], [ %b, %br_phi_b ], [ %c, %br_phi_c ] +; OPT-NOT: convert +; OPT: ret + +entry: + switch i32 %cond, label %br_phi_c [ + i32 43, label %br_phi_a + i32 45, label %br_phi_b + ] + +br_phi_a: + %a1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %a) + br label %join + +br_phi_b: + %b1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %b) + br label %join + +br_phi_c: + %c1 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %c) + br label %join + +join: + %pg = phi [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ] + %pg1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) + ret %pg1 +} + +define @reinterpret_reductions_1(i32 %cond, %a, %b, %c) { +; OPT-LABEL: reinterpret_reductions_1 +; OPT: convert +; OPT: phi [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ] +; OPT-NOT: phi +; OPT: tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( %pg) +; OPT: ret + +entry: + switch i32 %cond, label %br_phi_c