https://github.com/wangpc-pp updated https://github.com/llvm/llvm-project/pull/114971
>From 3fd27bd1405a8b2c068786a200d610b9cacb65ef Mon Sep 17 00:00:00 2001 From: Wang Pengcheng <wangpengcheng...@bytedance.com> Date: Tue, 5 Nov 2024 20:38:44 +0800 Subject: [PATCH 1/4] Set max bytes Created using spr 1.3.6-beta.1 --- llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index c65feb9755633..a1c5f76bae009 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -2508,7 +2508,10 @@ RISCVTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { Options.LoadSizes = {4, 2, 1}; if (IsZeroCmp && ST->hasVInstructions()) { unsigned VLenB = ST->getRealMinVLen() / 8; - for (unsigned Size = ST->getXLen() / 8 + 1; + // The minimum size should be the maximum bytes between `VLen * LMUL_MF8` + // and `XLen + 8`. + unsigned MinSize = std::max(VLenB / 8, ST->getXLen() / 8 + 1); + for (unsigned Size = MinSize; Size <= VLenB * ST->getMaxLMULForFixedLengthVectors(); Size++) Options.LoadSizes.insert(Options.LoadSizes.begin(), Size); } >From 17115212f1d7af68f5374896d1ddadf464b2bc11 Mon Sep 17 00:00:00 2001 From: Wang Pengcheng <wangpengcheng...@bytedance.com> Date: Fri, 13 Jun 2025 18:24:15 +0800 Subject: [PATCH 2/4] Change to XLen + 1 Created using spr 1.3.6-beta.1 --- .../Target/RISCV/RISCVTargetTransformInfo.cpp | 4 +- llvm/test/CodeGen/RISCV/memcmp-optsize.ll | 324 +++++++++++++++--- llvm/test/CodeGen/RISCV/memcmp.ll | 324 +++++++++++++++--- 3 files changed, 570 insertions(+), 82 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 4b9ea30a92c99..3aa0fcbb723a1 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -2956,8 +2956,8 @@ RISCVTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { if (IsZeroCmp && ST->hasVInstructions()) { unsigned VLenB = ST->getRealMinVLen() / 8; // The minimum size should be the maximum bytes between `VLen * LMUL_MF8` - // and `XLen * 2`. - unsigned MinSize = std::max(VLenB / 8, ST->getXLen() * 2 / 8); + // and `XLen + 1`. + unsigned MinSize = std::max(VLenB / 8, ST->getXLen() / 8 + 1); for (unsigned Size = MinSize; Size <= VLenB * ST->getMaxLMULForFixedLengthVectors(); Size++) Options.LoadSizes.insert(Options.LoadSizes.begin(), Size); diff --git a/llvm/test/CodeGen/RISCV/memcmp-optsize.ll b/llvm/test/CodeGen/RISCV/memcmp-optsize.ll index d4d12a932d0ec..0d57e4201512e 100644 --- a/llvm/test/CodeGen/RISCV/memcmp-optsize.ll +++ b/llvm/test/CodeGen/RISCV/memcmp-optsize.ll @@ -517,17 +517,99 @@ define i32 @bcmp_size_5(ptr %s1, ptr %s2) nounwind optsize { ; CHECK-ALIGNED-RV64-V-NEXT: addi sp, sp, 16 ; CHECK-ALIGNED-RV64-V-NEXT: ret ; -; CHECK-UNALIGNED-LABEL: bcmp_size_5: -; CHECK-UNALIGNED: # %bb.0: # %entry -; CHECK-UNALIGNED-NEXT: lw a2, 0(a0) -; CHECK-UNALIGNED-NEXT: lbu a0, 4(a0) -; CHECK-UNALIGNED-NEXT: lw a3, 0(a1) -; CHECK-UNALIGNED-NEXT: lbu a1, 4(a1) -; CHECK-UNALIGNED-NEXT: xor a2, a2, a3 -; CHECK-UNALIGNED-NEXT: xor a0, a0, a1 -; CHECK-UNALIGNED-NEXT: or a0, a2, a0 -; CHECK-UNALIGNED-NEXT: snez a0, a0 -; CHECK-UNALIGNED-NEXT: ret +; CHECK-UNALIGNED-RV32-LABEL: bcmp_size_5: +; CHECK-UNALIGNED-RV32: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV32-NEXT: lbu a0, 4(a0) +; CHECK-UNALIGNED-RV32-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV32-NEXT: lbu a1, 4(a1) +; CHECK-UNALIGNED-RV32-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV32-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV32-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV32-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV32-NEXT: ret +; +; CHECK-UNALIGNED-RV64-LABEL: bcmp_size_5: +; CHECK-UNALIGNED-RV64: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV64-NEXT: lbu a0, 4(a0) +; CHECK-UNALIGNED-RV64-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV64-NEXT: lbu a1, 4(a1) +; CHECK-UNALIGNED-RV64-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV64-NEXT: ret +; +; CHECK-UNALIGNED-RV32-ZBB-LABEL: bcmp_size_5: +; CHECK-UNALIGNED-RV32-ZBB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lbu a0, 4(a0) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lbu a1, 4(a1) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: ret +; +; CHECK-UNALIGNED-RV64-ZBB-LABEL: bcmp_size_5: +; CHECK-UNALIGNED-RV64-ZBB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lbu a0, 4(a0) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lbu a1, 4(a1) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: ret +; +; CHECK-UNALIGNED-RV32-ZBKB-LABEL: bcmp_size_5: +; CHECK-UNALIGNED-RV32-ZBKB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lbu a0, 4(a0) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lbu a1, 4(a1) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: ret +; +; CHECK-UNALIGNED-RV64-ZBKB-LABEL: bcmp_size_5: +; CHECK-UNALIGNED-RV64-ZBKB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lbu a0, 4(a0) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lbu a1, 4(a1) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: ret +; +; CHECK-UNALIGNED-RV32-V-LABEL: bcmp_size_5: +; CHECK-UNALIGNED-RV32-V: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-V-NEXT: vsetivli zero, 5, e8, mf2, ta, ma +; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v8, (a0) +; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v9, (a1) +; CHECK-UNALIGNED-RV32-V-NEXT: vmsne.vv v8, v8, v9 +; CHECK-UNALIGNED-RV32-V-NEXT: vcpop.m a0, v8 +; CHECK-UNALIGNED-RV32-V-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV32-V-NEXT: ret +; +; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_5: +; CHECK-UNALIGNED-RV64-V: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-V-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV64-V-NEXT: lbu a0, 4(a0) +; CHECK-UNALIGNED-RV64-V-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV64-V-NEXT: lbu a1, 4(a1) +; CHECK-UNALIGNED-RV64-V-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-V-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-V-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-V-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV64-V-NEXT: ret entry: %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iXLen 5) ret i32 %bcmp @@ -614,17 +696,99 @@ define i32 @bcmp_size_6(ptr %s1, ptr %s2) nounwind optsize { ; CHECK-ALIGNED-RV64-V-NEXT: addi sp, sp, 16 ; CHECK-ALIGNED-RV64-V-NEXT: ret ; -; CHECK-UNALIGNED-LABEL: bcmp_size_6: -; CHECK-UNALIGNED: # %bb.0: # %entry -; CHECK-UNALIGNED-NEXT: lw a2, 0(a0) -; CHECK-UNALIGNED-NEXT: lhu a0, 4(a0) -; CHECK-UNALIGNED-NEXT: lw a3, 0(a1) -; CHECK-UNALIGNED-NEXT: lhu a1, 4(a1) -; CHECK-UNALIGNED-NEXT: xor a2, a2, a3 -; CHECK-UNALIGNED-NEXT: xor a0, a0, a1 -; CHECK-UNALIGNED-NEXT: or a0, a2, a0 -; CHECK-UNALIGNED-NEXT: snez a0, a0 -; CHECK-UNALIGNED-NEXT: ret +; CHECK-UNALIGNED-RV32-LABEL: bcmp_size_6: +; CHECK-UNALIGNED-RV32: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV32-NEXT: lhu a0, 4(a0) +; CHECK-UNALIGNED-RV32-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV32-NEXT: lhu a1, 4(a1) +; CHECK-UNALIGNED-RV32-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV32-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV32-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV32-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV32-NEXT: ret +; +; CHECK-UNALIGNED-RV64-LABEL: bcmp_size_6: +; CHECK-UNALIGNED-RV64: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV64-NEXT: lhu a0, 4(a0) +; CHECK-UNALIGNED-RV64-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV64-NEXT: lhu a1, 4(a1) +; CHECK-UNALIGNED-RV64-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV64-NEXT: ret +; +; CHECK-UNALIGNED-RV32-ZBB-LABEL: bcmp_size_6: +; CHECK-UNALIGNED-RV32-ZBB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lhu a0, 4(a0) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lhu a1, 4(a1) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: ret +; +; CHECK-UNALIGNED-RV64-ZBB-LABEL: bcmp_size_6: +; CHECK-UNALIGNED-RV64-ZBB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lhu a0, 4(a0) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lhu a1, 4(a1) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: ret +; +; CHECK-UNALIGNED-RV32-ZBKB-LABEL: bcmp_size_6: +; CHECK-UNALIGNED-RV32-ZBKB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lhu a0, 4(a0) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lhu a1, 4(a1) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: ret +; +; CHECK-UNALIGNED-RV64-ZBKB-LABEL: bcmp_size_6: +; CHECK-UNALIGNED-RV64-ZBKB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lhu a0, 4(a0) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lhu a1, 4(a1) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: ret +; +; CHECK-UNALIGNED-RV32-V-LABEL: bcmp_size_6: +; CHECK-UNALIGNED-RV32-V: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-V-NEXT: vsetivli zero, 6, e8, mf2, ta, ma +; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v8, (a0) +; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v9, (a1) +; CHECK-UNALIGNED-RV32-V-NEXT: vmsne.vv v8, v8, v9 +; CHECK-UNALIGNED-RV32-V-NEXT: vcpop.m a0, v8 +; CHECK-UNALIGNED-RV32-V-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV32-V-NEXT: ret +; +; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_6: +; CHECK-UNALIGNED-RV64-V: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-V-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV64-V-NEXT: lhu a0, 4(a0) +; CHECK-UNALIGNED-RV64-V-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV64-V-NEXT: lhu a1, 4(a1) +; CHECK-UNALIGNED-RV64-V-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-V-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-V-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-V-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV64-V-NEXT: ret entry: %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iXLen 6) ret i32 %bcmp @@ -711,17 +875,99 @@ define i32 @bcmp_size_7(ptr %s1, ptr %s2) nounwind optsize { ; CHECK-ALIGNED-RV64-V-NEXT: addi sp, sp, 16 ; CHECK-ALIGNED-RV64-V-NEXT: ret ; -; CHECK-UNALIGNED-LABEL: bcmp_size_7: -; CHECK-UNALIGNED: # %bb.0: # %entry -; CHECK-UNALIGNED-NEXT: lw a2, 0(a0) -; CHECK-UNALIGNED-NEXT: lw a0, 3(a0) -; CHECK-UNALIGNED-NEXT: lw a3, 0(a1) -; CHECK-UNALIGNED-NEXT: lw a1, 3(a1) -; CHECK-UNALIGNED-NEXT: xor a2, a2, a3 -; CHECK-UNALIGNED-NEXT: xor a0, a0, a1 -; CHECK-UNALIGNED-NEXT: or a0, a2, a0 -; CHECK-UNALIGNED-NEXT: snez a0, a0 -; CHECK-UNALIGNED-NEXT: ret +; CHECK-UNALIGNED-RV32-LABEL: bcmp_size_7: +; CHECK-UNALIGNED-RV32: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV32-NEXT: lw a0, 3(a0) +; CHECK-UNALIGNED-RV32-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV32-NEXT: lw a1, 3(a1) +; CHECK-UNALIGNED-RV32-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV32-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV32-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV32-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV32-NEXT: ret +; +; CHECK-UNALIGNED-RV64-LABEL: bcmp_size_7: +; CHECK-UNALIGNED-RV64: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV64-NEXT: lw a0, 3(a0) +; CHECK-UNALIGNED-RV64-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV64-NEXT: lw a1, 3(a1) +; CHECK-UNALIGNED-RV64-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV64-NEXT: ret +; +; CHECK-UNALIGNED-RV32-ZBB-LABEL: bcmp_size_7: +; CHECK-UNALIGNED-RV32-ZBB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lw a0, 3(a0) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lw a1, 3(a1) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: ret +; +; CHECK-UNALIGNED-RV64-ZBB-LABEL: bcmp_size_7: +; CHECK-UNALIGNED-RV64-ZBB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lw a0, 3(a0) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lw a1, 3(a1) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: ret +; +; CHECK-UNALIGNED-RV32-ZBKB-LABEL: bcmp_size_7: +; CHECK-UNALIGNED-RV32-ZBKB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lw a0, 3(a0) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lw a1, 3(a1) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: ret +; +; CHECK-UNALIGNED-RV64-ZBKB-LABEL: bcmp_size_7: +; CHECK-UNALIGNED-RV64-ZBKB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a0, 3(a0) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a1, 3(a1) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: ret +; +; CHECK-UNALIGNED-RV32-V-LABEL: bcmp_size_7: +; CHECK-UNALIGNED-RV32-V: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-V-NEXT: vsetivli zero, 7, e8, mf2, ta, ma +; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v8, (a0) +; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v9, (a1) +; CHECK-UNALIGNED-RV32-V-NEXT: vmsne.vv v8, v8, v9 +; CHECK-UNALIGNED-RV32-V-NEXT: vcpop.m a0, v8 +; CHECK-UNALIGNED-RV32-V-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV32-V-NEXT: ret +; +; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_7: +; CHECK-UNALIGNED-RV64-V: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-V-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV64-V-NEXT: lw a0, 3(a0) +; CHECK-UNALIGNED-RV64-V-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV64-V-NEXT: lw a1, 3(a1) +; CHECK-UNALIGNED-RV64-V-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-V-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-V-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-V-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV64-V-NEXT: ret entry: %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iXLen 7) ret i32 %bcmp @@ -1079,13 +1325,11 @@ define i32 @bcmp_size_15(ptr %s1, ptr %s2) nounwind optsize { ; ; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_15: ; CHECK-UNALIGNED-RV64-V: # %bb.0: # %entry -; CHECK-UNALIGNED-RV64-V-NEXT: ld a2, 0(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a0, 7(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a3, 0(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a1, 7(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: xor a2, a2, a3 -; CHECK-UNALIGNED-RV64-V-NEXT: xor a0, a0, a1 -; CHECK-UNALIGNED-RV64-V-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-V-NEXT: vsetivli zero, 15, e8, m1, ta, ma +; CHECK-UNALIGNED-RV64-V-NEXT: vle8.v v8, (a0) +; CHECK-UNALIGNED-RV64-V-NEXT: vle8.v v9, (a1) +; CHECK-UNALIGNED-RV64-V-NEXT: vmsne.vv v8, v8, v9 +; CHECK-UNALIGNED-RV64-V-NEXT: vcpop.m a0, v8 ; CHECK-UNALIGNED-RV64-V-NEXT: snez a0, a0 ; CHECK-UNALIGNED-RV64-V-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/memcmp.ll b/llvm/test/CodeGen/RISCV/memcmp.ll index f57dc207c625e..0caab1f5ce2f0 100644 --- a/llvm/test/CodeGen/RISCV/memcmp.ll +++ b/llvm/test/CodeGen/RISCV/memcmp.ll @@ -517,17 +517,99 @@ define i32 @bcmp_size_5(ptr %s1, ptr %s2) nounwind { ; CHECK-ALIGNED-RV64-V-NEXT: addi sp, sp, 16 ; CHECK-ALIGNED-RV64-V-NEXT: ret ; -; CHECK-UNALIGNED-LABEL: bcmp_size_5: -; CHECK-UNALIGNED: # %bb.0: # %entry -; CHECK-UNALIGNED-NEXT: lw a2, 0(a0) -; CHECK-UNALIGNED-NEXT: lbu a0, 4(a0) -; CHECK-UNALIGNED-NEXT: lw a3, 0(a1) -; CHECK-UNALIGNED-NEXT: lbu a1, 4(a1) -; CHECK-UNALIGNED-NEXT: xor a2, a2, a3 -; CHECK-UNALIGNED-NEXT: xor a0, a0, a1 -; CHECK-UNALIGNED-NEXT: or a0, a2, a0 -; CHECK-UNALIGNED-NEXT: snez a0, a0 -; CHECK-UNALIGNED-NEXT: ret +; CHECK-UNALIGNED-RV32-LABEL: bcmp_size_5: +; CHECK-UNALIGNED-RV32: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV32-NEXT: lbu a0, 4(a0) +; CHECK-UNALIGNED-RV32-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV32-NEXT: lbu a1, 4(a1) +; CHECK-UNALIGNED-RV32-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV32-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV32-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV32-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV32-NEXT: ret +; +; CHECK-UNALIGNED-RV64-LABEL: bcmp_size_5: +; CHECK-UNALIGNED-RV64: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV64-NEXT: lbu a0, 4(a0) +; CHECK-UNALIGNED-RV64-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV64-NEXT: lbu a1, 4(a1) +; CHECK-UNALIGNED-RV64-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV64-NEXT: ret +; +; CHECK-UNALIGNED-RV32-ZBB-LABEL: bcmp_size_5: +; CHECK-UNALIGNED-RV32-ZBB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lbu a0, 4(a0) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lbu a1, 4(a1) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: ret +; +; CHECK-UNALIGNED-RV64-ZBB-LABEL: bcmp_size_5: +; CHECK-UNALIGNED-RV64-ZBB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lbu a0, 4(a0) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lbu a1, 4(a1) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: ret +; +; CHECK-UNALIGNED-RV32-ZBKB-LABEL: bcmp_size_5: +; CHECK-UNALIGNED-RV32-ZBKB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lbu a0, 4(a0) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lbu a1, 4(a1) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: ret +; +; CHECK-UNALIGNED-RV64-ZBKB-LABEL: bcmp_size_5: +; CHECK-UNALIGNED-RV64-ZBKB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lbu a0, 4(a0) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lbu a1, 4(a1) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: ret +; +; CHECK-UNALIGNED-RV32-V-LABEL: bcmp_size_5: +; CHECK-UNALIGNED-RV32-V: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-V-NEXT: vsetivli zero, 5, e8, mf2, ta, ma +; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v8, (a0) +; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v9, (a1) +; CHECK-UNALIGNED-RV32-V-NEXT: vmsne.vv v8, v8, v9 +; CHECK-UNALIGNED-RV32-V-NEXT: vcpop.m a0, v8 +; CHECK-UNALIGNED-RV32-V-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV32-V-NEXT: ret +; +; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_5: +; CHECK-UNALIGNED-RV64-V: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-V-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV64-V-NEXT: lbu a0, 4(a0) +; CHECK-UNALIGNED-RV64-V-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV64-V-NEXT: lbu a1, 4(a1) +; CHECK-UNALIGNED-RV64-V-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-V-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-V-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-V-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV64-V-NEXT: ret entry: %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iXLen 5) ret i32 %bcmp @@ -614,17 +696,99 @@ define i32 @bcmp_size_6(ptr %s1, ptr %s2) nounwind { ; CHECK-ALIGNED-RV64-V-NEXT: addi sp, sp, 16 ; CHECK-ALIGNED-RV64-V-NEXT: ret ; -; CHECK-UNALIGNED-LABEL: bcmp_size_6: -; CHECK-UNALIGNED: # %bb.0: # %entry -; CHECK-UNALIGNED-NEXT: lw a2, 0(a0) -; CHECK-UNALIGNED-NEXT: lhu a0, 4(a0) -; CHECK-UNALIGNED-NEXT: lw a3, 0(a1) -; CHECK-UNALIGNED-NEXT: lhu a1, 4(a1) -; CHECK-UNALIGNED-NEXT: xor a2, a2, a3 -; CHECK-UNALIGNED-NEXT: xor a0, a0, a1 -; CHECK-UNALIGNED-NEXT: or a0, a2, a0 -; CHECK-UNALIGNED-NEXT: snez a0, a0 -; CHECK-UNALIGNED-NEXT: ret +; CHECK-UNALIGNED-RV32-LABEL: bcmp_size_6: +; CHECK-UNALIGNED-RV32: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV32-NEXT: lhu a0, 4(a0) +; CHECK-UNALIGNED-RV32-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV32-NEXT: lhu a1, 4(a1) +; CHECK-UNALIGNED-RV32-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV32-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV32-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV32-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV32-NEXT: ret +; +; CHECK-UNALIGNED-RV64-LABEL: bcmp_size_6: +; CHECK-UNALIGNED-RV64: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV64-NEXT: lhu a0, 4(a0) +; CHECK-UNALIGNED-RV64-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV64-NEXT: lhu a1, 4(a1) +; CHECK-UNALIGNED-RV64-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV64-NEXT: ret +; +; CHECK-UNALIGNED-RV32-ZBB-LABEL: bcmp_size_6: +; CHECK-UNALIGNED-RV32-ZBB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lhu a0, 4(a0) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lhu a1, 4(a1) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: ret +; +; CHECK-UNALIGNED-RV64-ZBB-LABEL: bcmp_size_6: +; CHECK-UNALIGNED-RV64-ZBB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lhu a0, 4(a0) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lhu a1, 4(a1) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: ret +; +; CHECK-UNALIGNED-RV32-ZBKB-LABEL: bcmp_size_6: +; CHECK-UNALIGNED-RV32-ZBKB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lhu a0, 4(a0) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lhu a1, 4(a1) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: ret +; +; CHECK-UNALIGNED-RV64-ZBKB-LABEL: bcmp_size_6: +; CHECK-UNALIGNED-RV64-ZBKB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lhu a0, 4(a0) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lhu a1, 4(a1) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: ret +; +; CHECK-UNALIGNED-RV32-V-LABEL: bcmp_size_6: +; CHECK-UNALIGNED-RV32-V: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-V-NEXT: vsetivli zero, 6, e8, mf2, ta, ma +; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v8, (a0) +; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v9, (a1) +; CHECK-UNALIGNED-RV32-V-NEXT: vmsne.vv v8, v8, v9 +; CHECK-UNALIGNED-RV32-V-NEXT: vcpop.m a0, v8 +; CHECK-UNALIGNED-RV32-V-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV32-V-NEXT: ret +; +; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_6: +; CHECK-UNALIGNED-RV64-V: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-V-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV64-V-NEXT: lhu a0, 4(a0) +; CHECK-UNALIGNED-RV64-V-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV64-V-NEXT: lhu a1, 4(a1) +; CHECK-UNALIGNED-RV64-V-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-V-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-V-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-V-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV64-V-NEXT: ret entry: %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iXLen 6) ret i32 %bcmp @@ -711,17 +875,99 @@ define i32 @bcmp_size_7(ptr %s1, ptr %s2) nounwind { ; CHECK-ALIGNED-RV64-V-NEXT: addi sp, sp, 16 ; CHECK-ALIGNED-RV64-V-NEXT: ret ; -; CHECK-UNALIGNED-LABEL: bcmp_size_7: -; CHECK-UNALIGNED: # %bb.0: # %entry -; CHECK-UNALIGNED-NEXT: lw a2, 0(a0) -; CHECK-UNALIGNED-NEXT: lw a0, 3(a0) -; CHECK-UNALIGNED-NEXT: lw a3, 0(a1) -; CHECK-UNALIGNED-NEXT: lw a1, 3(a1) -; CHECK-UNALIGNED-NEXT: xor a2, a2, a3 -; CHECK-UNALIGNED-NEXT: xor a0, a0, a1 -; CHECK-UNALIGNED-NEXT: or a0, a2, a0 -; CHECK-UNALIGNED-NEXT: snez a0, a0 -; CHECK-UNALIGNED-NEXT: ret +; CHECK-UNALIGNED-RV32-LABEL: bcmp_size_7: +; CHECK-UNALIGNED-RV32: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV32-NEXT: lw a0, 3(a0) +; CHECK-UNALIGNED-RV32-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV32-NEXT: lw a1, 3(a1) +; CHECK-UNALIGNED-RV32-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV32-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV32-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV32-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV32-NEXT: ret +; +; CHECK-UNALIGNED-RV64-LABEL: bcmp_size_7: +; CHECK-UNALIGNED-RV64: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV64-NEXT: lw a0, 3(a0) +; CHECK-UNALIGNED-RV64-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV64-NEXT: lw a1, 3(a1) +; CHECK-UNALIGNED-RV64-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV64-NEXT: ret +; +; CHECK-UNALIGNED-RV32-ZBB-LABEL: bcmp_size_7: +; CHECK-UNALIGNED-RV32-ZBB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lw a0, 3(a0) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lw a1, 3(a1) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: ret +; +; CHECK-UNALIGNED-RV64-ZBB-LABEL: bcmp_size_7: +; CHECK-UNALIGNED-RV64-ZBB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lw a0, 3(a0) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lw a1, 3(a1) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: ret +; +; CHECK-UNALIGNED-RV32-ZBKB-LABEL: bcmp_size_7: +; CHECK-UNALIGNED-RV32-ZBKB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lw a0, 3(a0) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lw a1, 3(a1) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: ret +; +; CHECK-UNALIGNED-RV64-ZBKB-LABEL: bcmp_size_7: +; CHECK-UNALIGNED-RV64-ZBKB: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a0, 3(a0) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lw a1, 3(a1) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: ret +; +; CHECK-UNALIGNED-RV32-V-LABEL: bcmp_size_7: +; CHECK-UNALIGNED-RV32-V: # %bb.0: # %entry +; CHECK-UNALIGNED-RV32-V-NEXT: vsetivli zero, 7, e8, mf2, ta, ma +; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v8, (a0) +; CHECK-UNALIGNED-RV32-V-NEXT: vle8.v v9, (a1) +; CHECK-UNALIGNED-RV32-V-NEXT: vmsne.vv v8, v8, v9 +; CHECK-UNALIGNED-RV32-V-NEXT: vcpop.m a0, v8 +; CHECK-UNALIGNED-RV32-V-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV32-V-NEXT: ret +; +; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_7: +; CHECK-UNALIGNED-RV64-V: # %bb.0: # %entry +; CHECK-UNALIGNED-RV64-V-NEXT: lw a2, 0(a0) +; CHECK-UNALIGNED-RV64-V-NEXT: lw a0, 3(a0) +; CHECK-UNALIGNED-RV64-V-NEXT: lw a3, 0(a1) +; CHECK-UNALIGNED-RV64-V-NEXT: lw a1, 3(a1) +; CHECK-UNALIGNED-RV64-V-NEXT: xor a2, a2, a3 +; CHECK-UNALIGNED-RV64-V-NEXT: xor a0, a0, a1 +; CHECK-UNALIGNED-RV64-V-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-V-NEXT: snez a0, a0 +; CHECK-UNALIGNED-RV64-V-NEXT: ret entry: %bcmp = call signext i32 @bcmp(ptr %s1, ptr %s2, iXLen 7) ret i32 %bcmp @@ -1079,13 +1325,11 @@ define i32 @bcmp_size_15(ptr %s1, ptr %s2) nounwind { ; ; CHECK-UNALIGNED-RV64-V-LABEL: bcmp_size_15: ; CHECK-UNALIGNED-RV64-V: # %bb.0: # %entry -; CHECK-UNALIGNED-RV64-V-NEXT: ld a2, 0(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a0, 7(a0) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a3, 0(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: ld a1, 7(a1) -; CHECK-UNALIGNED-RV64-V-NEXT: xor a2, a2, a3 -; CHECK-UNALIGNED-RV64-V-NEXT: xor a0, a0, a1 -; CHECK-UNALIGNED-RV64-V-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-V-NEXT: vsetivli zero, 15, e8, m1, ta, ma +; CHECK-UNALIGNED-RV64-V-NEXT: vle8.v v8, (a0) +; CHECK-UNALIGNED-RV64-V-NEXT: vle8.v v9, (a1) +; CHECK-UNALIGNED-RV64-V-NEXT: vmsne.vv v8, v8, v9 +; CHECK-UNALIGNED-RV64-V-NEXT: vcpop.m a0, v8 ; CHECK-UNALIGNED-RV64-V-NEXT: snez a0, a0 ; CHECK-UNALIGNED-RV64-V-NEXT: ret entry: >From 3fb6d8a32231e8b77f926eda89886f47d77c3e1d Mon Sep 17 00:00:00 2001 From: Wang Pengcheng <wangpengcheng...@bytedance.com> Date: Mon, 16 Jun 2025 12:48:40 +0800 Subject: [PATCH 3/4] MinSize = XLen + 1 Created using spr 1.3.6-beta.1 --- llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 3aa0fcbb723a1..1b0e95df6a79e 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -2955,9 +2955,8 @@ RISCVTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { if (IsZeroCmp && ST->hasVInstructions()) { unsigned VLenB = ST->getRealMinVLen() / 8; - // The minimum size should be the maximum bytes between `VLen * LMUL_MF8` - // and `XLen + 1`. - unsigned MinSize = std::max(VLenB / 8, ST->getXLen() / 8 + 1); + // The minimum size should be `XLen + 1`. + unsigned MinSize = ST->getXLen() / 8 + 1; for (unsigned Size = MinSize; Size <= VLenB * ST->getMaxLMULForFixedLengthVectors(); Size++) Options.LoadSizes.insert(Options.LoadSizes.begin(), Size); >From bd11a42f765876d5861da83a096def58731a678e Mon Sep 17 00:00:00 2001 From: Wang Pengcheng <wangpengcheng...@bytedance.com> Date: Tue, 17 Jun 2025 12:04:26 +0800 Subject: [PATCH 4/4] Update comment and simplify code Created using spr 1.3.6-beta.1 --- llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 4db1bb600a96a..c19a91c0a0ace 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -2995,10 +2995,11 @@ RISCVTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { if (IsZeroCmp && ST->hasVInstructions()) { unsigned VLenB = ST->getRealMinVLen() / 8; - // The minimum size should be `XLen / 8 + 1`. + // The minimum size should be `XLen / 8 + 1`, and the maxinum size should be + // `VLenB * MaxLMUL` so that it fits in a single register group. unsigned MinSize = ST->getXLen() / 8 + 1; - for (unsigned Size = MinSize; - Size <= VLenB * ST->getMaxLMULForFixedLengthVectors(); Size++) + unsigned MaxSize = VLenB * ST->getMaxLMULForFixedLengthVectors(); + for (unsigned Size = MinSize; Size <= MaxSize; Size++) Options.LoadSizes.insert(Options.LoadSizes.begin(), Size); } return Options; _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits