https://github.com/Bryce-MW updated
https://github.com/llvm/llvm-project/pull/77964
>From d4c312b9dbf447d0a53dda0e6cdc482bd908430b Mon Sep 17 00:00:00 2001
From: Bryce Wilson
Date: Fri, 12 Jan 2024 16:01:32 -0600
Subject: [PATCH 01/15] [X86] Use RORX over SHR imm
---
llvm/lib/Target/X86/X86InstrShiftRotate.td | 78 ++
llvm/test/CodeGen/X86/atomic-unordered.ll | 3 +-
llvm/test/CodeGen/X86/bmi2.ll | 6 +-
llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll | 3 +-
llvm/test/CodeGen/X86/pr35636.ll | 4 +-
llvm/test/CodeGen/X86/vector-trunc-ssat.ll | 116 ++---
6 files changed, 143 insertions(+), 67 deletions(-)
diff --git a/llvm/lib/Target/X86/X86InstrShiftRotate.td
b/llvm/lib/Target/X86/X86InstrShiftRotate.td
index f951894db1890cd..238e8e9b6e97f30 100644
--- a/llvm/lib/Target/X86/X86InstrShiftRotate.td
+++ b/llvm/lib/Target/X86/X86InstrShiftRotate.td
@@ -879,6 +879,26 @@ let Predicates = [HasBMI2, HasEGPR, In64BitMode] in {
defm SHLX64 : bmi_shift<"shlx{q}", GR64, i64mem, "_EVEX">, T8, PD, REX_W,
EVEX;
}
+
+def immle16_8 : ImmLeaf;
+def immle32_8 : ImmLeaf;
+def immle64_8 : ImmLeaf;
+def immle32_16 : ImmLeaf;
+def immle64_16 : ImmLeaf;
+def immle64_32 : ImmLeaf;
+
let Predicates = [HasBMI2] in {
// Prefer RORX which is non-destructive and doesn't update EFLAGS.
let AddedComplexity = 10 in {
@@ -891,6 +911,64 @@ let Predicates = [HasBMI2] in {
(RORX32ri GR32:$src, (ROT32L2R_imm8 imm:$shamt))>;
def : Pat<(rotl GR64:$src, (i8 imm:$shamt)),
(RORX64ri GR64:$src, (ROT64L2R_imm8 imm:$shamt))>;
+
+// A right shift by less than a smaller register size that is then
+// truncated to that register size can be replaced by RORX to
+// preserve flags with the same execution cost
+
+def : Pat<(i8 (trunc (srl GR16:$src, (i8 immle16_8:$shamt,
+ (EXTRACT_SUBREG (RORX32ri (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
GR16:$src, sub_16bit), imm:$shamt), sub_8bit)>;
+def : Pat<(i8 (trunc (sra GR16:$src, (i8 immle16_8:$shamt,
+ (EXTRACT_SUBREG (RORX32ri (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
GR16:$src, sub_16bit), imm:$shamt), sub_8bit)>;
+def : Pat<(i8 (trunc (srl GR32:$src, (i8 immle32_8:$shamt,
+ (EXTRACT_SUBREG (RORX32ri GR32:$src, imm:$shamt), sub_8bit)>;
+def : Pat<(i8 (trunc (sra GR32:$src, (i8 immle32_8:$shamt,
+ (EXTRACT_SUBREG (RORX32ri GR32:$src, imm:$shamt), sub_8bit)>;
+def : Pat<(i8 (trunc (srl GR64:$src, (i8 immle64_8:$shamt,
+ (EXTRACT_SUBREG (RORX64ri GR64:$src, imm:$shamt), sub_8bit)>;
+def : Pat<(i8 (trunc (sra GR64:$src, (i8 immle64_8:$shamt,
+ (EXTRACT_SUBREG (RORX64ri GR64:$src, imm:$shamt), sub_8bit)>;
+
+
+def : Pat<(i16 (trunc (srl GR32:$src, (i8 immle32_16:$shamt,
+ (EXTRACT_SUBREG (RORX32ri GR32:$src, imm:$shamt), sub_16bit)>;
+def : Pat<(i16 (trunc (sra GR32:$src, (i8 immle32_16:$shamt,
+ (EXTRACT_SUBREG (RORX32ri GR32:$src, imm:$shamt), sub_16bit)>;
+def : Pat<(i16 (trunc (srl GR64:$src, (i8 immle64_16:$shamt,
+ (EXTRACT_SUBREG (RORX64ri GR64:$src, imm:$shamt), sub_16bit)>;
+def : Pat<(i16 (trunc (sra GR64:$src, (i8 immle64_16:$shamt,
+ (EXTRACT_SUBREG (RORX64ri GR64:$src, imm:$shamt), sub_16bit)>;
+
+def : Pat<(i32 (trunc (srl GR64:$src, (i8 immle64_32:$shamt,
+ (EXTRACT_SUBREG (RORX64ri GR64:$src, imm:$shamt), sub_32bit)>;
+def : Pat<(i32 (trunc (sra GR64:$src, (i8 immle64_32:$shamt,
+ (EXTRACT_SUBREG (RORX64ri GR64:$src, imm:$shamt), sub_32bit)>;
+
+
+// Can't expand the load
+def : Pat<(i8 (trunc (srl (loadi32 addr:$src), (i8 immle32_8:$shamt,
+ (EXTRACT_SUBREG (RORX32mi addr:$src, imm:$shamt), sub_8bit)>;
+def : Pat<(i8 (trunc (sra (loadi32 addr:$src), (i8 immle32_8:$shamt,
+ (EXTRACT_SUBREG (RORX32mi addr:$src, imm:$shamt), sub_8bit)>;
+def : Pat<(i8 (trunc (srl (loadi64 addr:$src), (i8 immle64_8:$shamt,
+ (EXTRACT_SUBREG (RORX64mi addr:$src, imm:$shamt), sub_8bit)>;
+def : Pat<(i8 (trunc (sra (loadi64 addr:$src), (i8 immle64_8:$shamt,
+ (EXTRACT_SUBREG (RORX64mi addr:$src, imm:$shamt), sub_8bit)>;
+
+
+def : Pat<(i16 (trunc (srl (loadi32 addr:$src), (i8 immle32_16:$shamt,
+ (EXTRACT_SUBREG (RORX32mi addr:$src, imm:$shamt), sub_16bit)>;
+def : Pat<(i16 (trunc (sra (loadi32 addr:$src), (i8 immle32_16:$shamt,
+ (EXTRACT_SUBREG (RORX32mi addr:$src, imm:$shamt), sub_16bit)>;
+def : Pat<(i16 (trunc (srl (loadi64 addr:$src), (i8 immle64_16:$shamt,
+ (EXTRACT_SUBREG (RORX64mi addr:$src, imm:$shamt), sub_16bit)>;
+def : Pat<(i16 (trunc (sra (loadi64 addr:$src), (i8 immle64_16:$shamt,
+ (EXTRACT_SUBREG (RORX64mi addr:$src, imm:$shamt), sub_16bit)>;
+
+def : Pat<(i32 (trunc