SixWeining created this revision.
SixWeining added reviewers: xen0n, xry111, MaskRay, rengolin, benshi001,
wangleiat.
Herald added subscribers: hiraditya, kristof.beyls.
Herald added a project: All.
SixWeining requested review of this revision.
Herald added projects: clang, LLVM.
Herald added subscribers: llvm-commits, cfe-commits.
Some CPUs do not allow memory accesses to be unaligned, e.g. 2k1000la
who uses the la264 core on which misaligned access will trigger an
exception.
In this patch, a backend feature called `ual` is defined to decribe
whether the CPU supports unaligned memroy accesses. And this feature
can be toggled by clang options `-m[no-]unaligned-access` or the
aliases `-m[no-]strict-align`. When this feature is on,
`allowsMisalignedMemoryAccesses` sets the speed number to 1 and returns
true that allows the codegen to generate unaligned memory access insns.
Clang options `-m[no-]unaligned-access` are moved from `m_arm_Features_Group`
to `m_Group` because now more than one targets use them. In addition,
to keep compatible with gcc, a new alias `-mno-strict-align` is added
which is equal to `-munaligned-access`.
The feature name `ual` is consistent with linux kernel [1] and the
output of `lscpu` or `/proc/cpuinfo` [2].
There is an `LLT` variant of `allowsMisalignedMemoryAccesses`, but
seems that curently it is only used in GlobalISel which LoongArch
doesn't support yet. So this variant is not implemented in this patch.
[1]:
https://github.com/torvalds/linux/blob/master/arch/loongarch/include/asm/cpu.h#L77
[2]:
https://github.com/torvalds/linux/blob/master/arch/loongarch/kernel/proc.c#L75
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D149946
Files:
clang/include/clang/Driver/Options.td
clang/lib/Driver/ToolChains/Arch/LoongArch.cpp
clang/test/Driver/loongarch-default-features.c
clang/test/Driver/loongarch-march.c
clang/test/Driver/loongarch-mdouble-float.c
clang/test/Driver/loongarch-mfpu.c
clang/test/Driver/loongarch-msingle-float.c
clang/test/Driver/loongarch-msoft-float.c
clang/test/Driver/loongarch-munaligned-access.c
llvm/include/llvm/TargetParser/LoongArchTargetParser.def
llvm/include/llvm/TargetParser/LoongArchTargetParser.h
llvm/lib/Target/LoongArch/LoongArch.td
llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
llvm/lib/Target/LoongArch/LoongArchISelLowering.h
llvm/lib/Target/LoongArch/LoongArchSubtarget.h
llvm/test/CodeGen/LoongArch/calling-conv-lp64d.ll
llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll
llvm/test/CodeGen/LoongArch/tail-calls.ll
llvm/test/CodeGen/LoongArch/unaligned-access.ll
llvm/test/CodeGen/LoongArch/unaligned-memcpy-inline.ll
Index: llvm/test/CodeGen/LoongArch/unaligned-memcpy-inline.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/LoongArch/unaligned-memcpy-inline.ll
@@ -0,0 +1,94 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc --mtriple=loongarch32 --mattr=-ual < %s | FileCheck %s --check-prefix=LA32
+; RUN: llc --mtriple=loongarch64 --mattr=-ual < %s | FileCheck %s --check-prefix=LA64
+
+; Small (16 bytes here) unaligned memcpy() should be a function call if
+; ual is turned off.
+define void @t0(ptr %out, ptr %in) {
+; LA32-LABEL: t0:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: .cfi_def_cfa_offset 16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: .cfi_offset 1, -4
+; LA32-NEXT: ori $a2, $zero, 16
+; LA32-NEXT: bl %plt(memcpy)
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: t0:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: .cfi_def_cfa_offset 16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: .cfi_offset 1, -8
+; LA64-NEXT: ori $a2, $zero, 16
+; LA64-NEXT: bl %plt(memcpy)
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+entry:
+ call void @llvm.memcpy.p0.p0.i64(ptr %out, ptr %in, i64 16, i1 false)
+ ret void
+}
+
+; Small (16 bytes here) aligned memcpy() should be inlined even if
+; ual is turned off.
+define void @t1(ptr align 8 %out, ptr align 8 %in) {
+; LA32-LABEL: t1:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: ld.w $a2, $a1, 12
+; LA32-NEXT: st.w $a2, $a0, 12
+; LA32-NEXT: ld.w $a2, $a1, 8
+; LA32-NEXT: st.w $a2, $a0, 8
+; LA32-NEXT: ld.w $a2, $a1, 4
+; LA32-NEXT: st.w $a2, $a0, 4
+; LA32-NEXT: ld.w $a1, $a1, 0
+; LA32-NEXT: st.w $a1, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: t1:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: ld.d $a2, $a1, 8
+; LA64-NEXT: st.d $a2, $a0, 8
+; LA64-NEXT: ld.d $a1, $a1, 0
+; LA64-NEXT: st.d $a1, $a0, 0
+; LA64-NEXT: ret
+entry:
+ call void @llvm.memcpy.p0.p0.i64(ptr align 8 %out, ptr align 8 %in, i64 16, i1 false)
+ ret void
+}
+
+; Tiny (4 bytes here) unaligned memcpy() should be inlined with byte sized
+; loads and stores if ual is turned off.
+define void @t2(ptr %out, ptr %in) {
+; LA32-LABEL: t2:
+; LA32: # %bb.0: # %entry
+; LA32-NEXT: ld.b $a2, $a1, 3
+; LA32-NEXT: st.b $a2, $a0, 3
+; LA32-NEXT: ld.b $a2, $a1, 2
+; LA32-NEXT: st.b $a2, $a0, 2
+; LA32-NEXT: ld.b $a2, $a1, 1
+; LA32-NEXT: st.b $a2, $a0, 1
+; LA32-NEXT: ld.b $a1, $a1, 0
+; LA32-NEXT: st.b $a1, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: t2:
+; LA64: # %bb.0: # %entry
+; LA64-NEXT: ld.b $a2, $a1, 3
+; LA64-NEXT: st.b $a2, $a0, 3
+; LA64-NEXT: ld.b $a2, $a1, 2
+; LA64-NEXT: st.b $a2, $a0, 2
+; LA64-NEXT: ld.b $a2, $a1, 1
+; LA64-NEXT: st.b $a2, $a0, 1
+; LA64-NEXT: ld.b $a1, $a1, 0
+; LA64-NEXT: st.b $a1, $a0, 0
+; LA64-NEXT: ret
+entry:
+ call void @llvm.memcpy.p0.p0.i64(ptr %out, ptr %in, i64 4, i1 false)
+ ret void
+}
+
+declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture readonly, i64, i1)
Index: llvm/test/CodeGen/LoongArch/unaligned-access.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/LoongArch/unaligned-access.ll
@@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32-ALIGNED
+; RUN: llc --mtriple=loongarch32 --mattr=+ual < %s | FileCheck %s --check-prefix=LA32-UNALIGNED
+; RUN: llc --mtriple=loongarch32 --mattr=-ual < %s | FileCheck %s --check-prefix=LA32-ALIGNED
+
+; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64-UNALIGNED
+; RUN: llc --mtriple=loongarch64 --mattr=+ual < %s | FileCheck %s --check-prefix=LA64-UNALIGNED
+; RUN: llc --mtriple=loongarch64 --mattr=-ual < %s | FileCheck %s --check-prefix=LA64-ALIGNED
+
+define i32 @f0(ptr %p) nounwind {
+; LA32-ALIGNED-LABEL: f0:
+; LA32-ALIGNED: # %bb.0:
+; LA32-ALIGNED-NEXT: ld.hu $a1, $a0, 0
+; LA32-ALIGNED-NEXT: ld.hu $a0, $a0, 2
+; LA32-ALIGNED-NEXT: slli.w $a0, $a0, 16
+; LA32-ALIGNED-NEXT: or $a0, $a0, $a1
+; LA32-ALIGNED-NEXT: ret
+;
+; LA32-UNALIGNED-LABEL: f0:
+; LA32-UNALIGNED: # %bb.0:
+; LA32-UNALIGNED-NEXT: ld.w $a0, $a0, 0
+; LA32-UNALIGNED-NEXT: ret
+;
+; LA64-UNALIGNED-LABEL: f0:
+; LA64-UNALIGNED: # %bb.0:
+; LA64-UNALIGNED-NEXT: ld.w $a0, $a0, 0
+; LA64-UNALIGNED-NEXT: ret
+;
+; LA64-ALIGNED-LABEL: f0:
+; LA64-ALIGNED: # %bb.0:
+; LA64-ALIGNED-NEXT: ld.hu $a1, $a0, 0
+; LA64-ALIGNED-NEXT: ld.h $a0, $a0, 2
+; LA64-ALIGNED-NEXT: slli.d $a0, $a0, 16
+; LA64-ALIGNED-NEXT: or $a0, $a0, $a1
+; LA64-ALIGNED-NEXT: ret
+ %tmp = load i32, ptr %p, align 2
+ ret i32 %tmp
+}
+
+define i64 @f1(ptr %p) nounwind {
+; LA32-ALIGNED-LABEL: f1:
+; LA32-ALIGNED: # %bb.0:
+; LA32-ALIGNED-NEXT: ld.w $a2, $a0, 0
+; LA32-ALIGNED-NEXT: ld.w $a1, $a0, 4
+; LA32-ALIGNED-NEXT: move $a0, $a2
+; LA32-ALIGNED-NEXT: ret
+;
+; LA32-UNALIGNED-LABEL: f1:
+; LA32-UNALIGNED: # %bb.0:
+; LA32-UNALIGNED-NEXT: ld.w $a2, $a0, 0
+; LA32-UNALIGNED-NEXT: ld.w $a1, $a0, 4
+; LA32-UNALIGNED-NEXT: move $a0, $a2
+; LA32-UNALIGNED-NEXT: ret
+;
+; LA64-UNALIGNED-LABEL: f1:
+; LA64-UNALIGNED: # %bb.0:
+; LA64-UNALIGNED-NEXT: ld.d $a0, $a0, 0
+; LA64-UNALIGNED-NEXT: ret
+;
+; LA64-ALIGNED-LABEL: f1:
+; LA64-ALIGNED: # %bb.0:
+; LA64-ALIGNED-NEXT: ld.wu $a1, $a0, 0
+; LA64-ALIGNED-NEXT: ld.wu $a0, $a0, 4
+; LA64-ALIGNED-NEXT: slli.d $a0, $a0, 32
+; LA64-ALIGNED-NEXT: or $a0, $a0, $a1
+; LA64-ALIGNED-NEXT: ret
+ %tmp = load i64, ptr %p, align 4
+ ret i64 %tmp
+}
Index: llvm/test/CodeGen/LoongArch/tail-calls.ll
===================================================================
--- llvm/test/CodeGen/LoongArch/tail-calls.ll
+++ llvm/test/CodeGen/LoongArch/tail-calls.ll
@@ -13,6 +13,7 @@
}
;; Perform tail call optimization for external symbol.
+;; Bytes copied should be large enough, otherwise the memcpy call would be optimized to multiple ld/st insns.
@dest = global [2 x i8] zeroinitializer
declare void @llvm.memcpy.p0i8.p0i8.i32(ptr, ptr, i32, i1)
define void @caller_extern(ptr %src) optsize {
@@ -21,10 +22,10 @@
; CHECK-NEXT: move $a1, $a0
; CHECK-NEXT: pcalau12i $a0, %got_pc_hi20(dest)
; CHECK-NEXT: ld.d $a0, $a0, %got_pc_lo12(dest)
-; CHECK-NEXT: ori $a2, $zero, 7
+; CHECK-NEXT: ori $a2, $zero, 33
; CHECK-NEXT: b %plt(memcpy)
entry:
- tail call void @llvm.memcpy.p0i8.p0i8.i32(ptr getelementptr inbounds ([2 x i8], ptr @dest, i32 0, i32 0), ptr %src, i32 7, i1 false)
+ tail call void @llvm.memcpy.p0i8.p0i8.i32(ptr getelementptr inbounds ([2 x i8], ptr @dest, i32 0, i32 0), ptr %src, i32 33, i1 false)
ret void
}
Index: llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll
===================================================================
--- llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll
+++ llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll
@@ -315,10 +315,7 @@
; LA64F-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill
; LA64F-NEXT: st.d $s3, $sp, 16 # 8-byte Folded Spill
; LA64F-NEXT: move $fp, $a0
-; LA64F-NEXT: ld.wu $a0, $a0, 0
-; LA64F-NEXT: ld.wu $a1, $fp, 4
-; LA64F-NEXT: slli.d $a1, $a1, 32
-; LA64F-NEXT: or $a0, $a1, $a0
+; LA64F-NEXT: ld.d $a0, $a0, 0
; LA64F-NEXT: ori $s0, $zero, 8
; LA64F-NEXT: addi.d $s1, $sp, 8
; LA64F-NEXT: addi.d $s2, $sp, 0
@@ -360,11 +357,7 @@
; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill
; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill
; LA64D-NEXT: move $fp, $a0
-; LA64D-NEXT: ld.wu $a0, $a0, 0
-; LA64D-NEXT: ld.wu $a1, $fp, 4
-; LA64D-NEXT: slli.d $a1, $a1, 32
-; LA64D-NEXT: or $a0, $a1, $a0
-; LA64D-NEXT: movgr2fr.d $fa0, $a0
+; LA64D-NEXT: fld.d $fa0, $a0, 0
; LA64D-NEXT: addi.d $a0, $zero, 1
; LA64D-NEXT: movgr2fr.d $fs0, $a0
; LA64D-NEXT: ori $s0, $zero, 8
@@ -411,10 +404,7 @@
; LA64F-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill
; LA64F-NEXT: st.d $s3, $sp, 16 # 8-byte Folded Spill
; LA64F-NEXT: move $fp, $a0
-; LA64F-NEXT: ld.wu $a0, $a0, 0
-; LA64F-NEXT: ld.wu $a1, $fp, 4
-; LA64F-NEXT: slli.d $a1, $a1, 32
-; LA64F-NEXT: or $a0, $a1, $a0
+; LA64F-NEXT: ld.d $a0, $a0, 0
; LA64F-NEXT: ori $s0, $zero, 8
; LA64F-NEXT: addi.d $s1, $sp, 8
; LA64F-NEXT: addi.d $s2, $sp, 0
@@ -456,11 +446,7 @@
; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill
; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill
; LA64D-NEXT: move $fp, $a0
-; LA64D-NEXT: ld.wu $a0, $a0, 0
-; LA64D-NEXT: ld.wu $a1, $fp, 4
-; LA64D-NEXT: slli.d $a1, $a1, 32
-; LA64D-NEXT: or $a0, $a1, $a0
-; LA64D-NEXT: movgr2fr.d $fa0, $a0
+; LA64D-NEXT: fld.d $fa0, $a0, 0
; LA64D-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0)
; LA64D-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI5_0)
; LA64D-NEXT: fld.d $fs0, $a0, 0
@@ -507,10 +493,7 @@
; LA64F-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill
; LA64F-NEXT: st.d $s3, $sp, 16 # 8-byte Folded Spill
; LA64F-NEXT: move $fp, $a0
-; LA64F-NEXT: ld.wu $a0, $a0, 0
-; LA64F-NEXT: ld.wu $a1, $fp, 4
-; LA64F-NEXT: slli.d $a1, $a1, 32
-; LA64F-NEXT: or $a0, $a1, $a0
+; LA64F-NEXT: ld.d $a0, $a0, 0
; LA64F-NEXT: ori $s0, $zero, 8
; LA64F-NEXT: addi.d $s1, $sp, 8
; LA64F-NEXT: addi.d $s2, $sp, 0
@@ -552,11 +535,7 @@
; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill
; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill
; LA64D-NEXT: move $fp, $a0
-; LA64D-NEXT: ld.wu $a0, $a0, 0
-; LA64D-NEXT: ld.wu $a1, $fp, 4
-; LA64D-NEXT: slli.d $a1, $a1, 32
-; LA64D-NEXT: or $a0, $a1, $a0
-; LA64D-NEXT: movgr2fr.d $fa0, $a0
+; LA64D-NEXT: fld.d $fa0, $a0, 0
; LA64D-NEXT: addi.d $a0, $zero, 1
; LA64D-NEXT: movgr2fr.d $fs0, $a0
; LA64D-NEXT: ori $s0, $zero, 8
@@ -604,10 +583,7 @@
; LA64F-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill
; LA64F-NEXT: st.d $s3, $sp, 16 # 8-byte Folded Spill
; LA64F-NEXT: move $fp, $a0
-; LA64F-NEXT: ld.wu $a0, $a0, 0
-; LA64F-NEXT: ld.wu $a1, $fp, 4
-; LA64F-NEXT: slli.d $a1, $a1, 32
-; LA64F-NEXT: or $a0, $a1, $a0
+; LA64F-NEXT: ld.d $a0, $a0, 0
; LA64F-NEXT: ori $s0, $zero, 8
; LA64F-NEXT: addi.d $s1, $sp, 8
; LA64F-NEXT: addi.d $s2, $sp, 0
@@ -649,11 +625,7 @@
; LA64D-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill
; LA64D-NEXT: fst.d $fs0, $sp, 24 # 8-byte Folded Spill
; LA64D-NEXT: move $fp, $a0
-; LA64D-NEXT: ld.wu $a0, $a0, 0
-; LA64D-NEXT: ld.wu $a1, $fp, 4
-; LA64D-NEXT: slli.d $a1, $a1, 32
-; LA64D-NEXT: or $a0, $a1, $a0
-; LA64D-NEXT: movgr2fr.d $fa0, $a0
+; LA64D-NEXT: fld.d $fa0, $a0, 0
; LA64D-NEXT: addi.d $a0, $zero, 1
; LA64D-NEXT: movgr2fr.d $fs0, $a0
; LA64D-NEXT: ori $s0, $zero, 8
Index: llvm/test/CodeGen/LoongArch/calling-conv-lp64d.ll
===================================================================
--- llvm/test/CodeGen/LoongArch/calling-conv-lp64d.ll
+++ llvm/test/CodeGen/LoongArch/calling-conv-lp64d.ll
@@ -357,17 +357,13 @@
; CHECK-LABEL: callee_large_struct_ret:
; CHECK: # %bb.0:
; CHECK-NEXT: ori $a1, $zero, 4
-; CHECK-NEXT: st.w $a1, $a0, 24
+; CHECK-NEXT: st.d $a1, $a0, 24
; CHECK-NEXT: ori $a1, $zero, 3
-; CHECK-NEXT: st.w $a1, $a0, 16
+; CHECK-NEXT: st.d $a1, $a0, 16
; CHECK-NEXT: ori $a1, $zero, 2
-; CHECK-NEXT: st.w $a1, $a0, 8
-; CHECK-NEXT: st.w $zero, $a0, 28
-; CHECK-NEXT: st.w $zero, $a0, 20
-; CHECK-NEXT: st.w $zero, $a0, 12
-; CHECK-NEXT: st.w $zero, $a0, 4
+; CHECK-NEXT: st.d $a1, $a0, 8
; CHECK-NEXT: ori $a1, $zero, 1
-; CHECK-NEXT: st.w $a1, $a0, 0
+; CHECK-NEXT: st.d $a1, $a0, 0
; CHECK-NEXT: ret
%a = getelementptr inbounds %struct.large, ptr %agg.result, i64 0, i32 0
store i64 1, ptr %a, align 4
Index: llvm/lib/Target/LoongArch/LoongArchSubtarget.h
===================================================================
--- llvm/lib/Target/LoongArch/LoongArchSubtarget.h
+++ llvm/lib/Target/LoongArch/LoongArchSubtarget.h
@@ -42,6 +42,7 @@
bool HasLaGlobalWithPcrel = false;
bool HasLaGlobalWithAbs = false;
bool HasLaLocalWithAbs = false;
+ bool HasUAL = false;
unsigned GRLen = 32;
MVT GRLenVT = MVT::i32;
LoongArchABI::ABI TargetABI = LoongArchABI::ABI_Unknown;
@@ -91,6 +92,7 @@
bool hasLaGlobalWithPcrel() const { return HasLaGlobalWithPcrel; }
bool hasLaGlobalWithAbs() const { return HasLaGlobalWithAbs; }
bool hasLaLocalWithAbs() const { return HasLaLocalWithAbs; }
+ bool hasUAL() const { return HasUAL; }
MVT getGRLenVT() const { return GRLenVT; }
unsigned getGRLen() const { return GRLen; }
LoongArchABI::ABI getTargetABI() const { return TargetABI; }
Index: llvm/lib/Target/LoongArch/LoongArchISelLowering.h
===================================================================
--- llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -191,6 +191,11 @@
bool convertSelectOfConstantsToMath(EVT VT) const override { return true; }
+ bool allowsMisalignedMemoryAccesses(
+ EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1),
+ MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
+ unsigned *Fast = nullptr) const override;
+
private:
/// Target-specific function used to lower LoongArch calling conventions.
typedef bool LoongArchCCAssignFn(const DataLayout &DL, LoongArchABI::ABI ABI,
Index: llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
===================================================================
--- llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -1786,6 +1786,18 @@
}
}
+bool LoongArchTargetLowering::allowsMisalignedMemoryAccesses(
+ EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
+ unsigned *Fast) const {
+ if (!Subtarget.hasUAL())
+ return false;
+
+ // TODO: set reasonable speed number.
+ if (Fast)
+ *Fast = 1;
+ return true;
+}
+
const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch ((LoongArchISD::NodeType)Opcode) {
case LoongArchISD::FIRST_NUMBER:
Index: llvm/lib/Target/LoongArch/LoongArch.td
===================================================================
--- llvm/lib/Target/LoongArch/LoongArch.td
+++ llvm/lib/Target/LoongArch/LoongArch.td
@@ -115,6 +115,11 @@
AssemblerPredicate<(all_of LaLocalWithAbs),
"Expand la.local as la.abs">;
+// Unaligned memory access
+def FeatureUAL
+ : SubtargetFeature<"ual", "HasUAL", "true",
+ "Allow memory accesses to be unaligned">;
+
//===----------------------------------------------------------------------===//
// Registers, instruction descriptions ...
//===----------------------------------------------------------------------===//
@@ -128,13 +133,14 @@
//===----------------------------------------------------------------------===//
def : ProcessorModel<"generic-la32", NoSchedModel, [Feature32Bit]>;
-def : ProcessorModel<"generic-la64", NoSchedModel, [Feature64Bit]>;
+def : ProcessorModel<"generic-la64", NoSchedModel, [Feature64Bit, FeatureUAL]>;
// Support generic for compatibility with other targets. The triple will be used
// to change to the appropriate la32/la64 version.
def : ProcessorModel<"generic", NoSchedModel, []>;
def : ProcessorModel<"la464", NoSchedModel, [Feature64Bit,
+ FeatureUAL,
FeatureExtLASX,
FeatureExtLVZ,
FeatureExtLBT]>;
Index: llvm/include/llvm/TargetParser/LoongArchTargetParser.h
===================================================================
--- llvm/include/llvm/TargetParser/LoongArchTargetParser.h
+++ llvm/include/llvm/TargetParser/LoongArchTargetParser.h
@@ -46,6 +46,9 @@
// Loongson Virtualization Extension is available.
FK_LVZ = 1 << 7,
+
+ // Allow memory accesses to be unaligned.
+ FK_UAL = 1 << 8,
};
struct FeatureInfo {
Index: llvm/include/llvm/TargetParser/LoongArchTargetParser.def
===================================================================
--- llvm/include/llvm/TargetParser/LoongArchTargetParser.def
+++ llvm/include/llvm/TargetParser/LoongArchTargetParser.def
@@ -11,6 +11,7 @@
LOONGARCH_FEATURE("+lasx", FK_LASX)
LOONGARCH_FEATURE("+lbt", FK_LBT)
LOONGARCH_FEATURE("+lvz", FK_LVZ)
+LOONGARCH_FEATURE("+ual", FK_UAL)
#undef LOONGARCH_FEATURE
@@ -19,7 +20,7 @@
#endif
LOONGARCH_ARCH("invalid", AK_INVALID, FK_INVALID)
-LOONGARCH_ARCH("loongarch64", AK_LOONGARCH64, FK_64BIT | FK_FP32 | FK_FP64)
-LOONGARCH_ARCH("la464", AK_LA464, FK_64BIT | FK_FP32 | FK_FP64 | FK_LSX | FK_LASX)
+LOONGARCH_ARCH("loongarch64", AK_LOONGARCH64, FK_64BIT | FK_FP32 | FK_FP64 | FK_UAL)
+LOONGARCH_ARCH("la464", AK_LA464, FK_64BIT | FK_FP32 | FK_FP64 | FK_LSX | FK_LASX | FK_UAL)
#undef LOONGARCH_ARCH
Index: clang/test/Driver/loongarch-munaligned-access.c
===================================================================
--- /dev/null
+++ clang/test/Driver/loongarch-munaligned-access.c
@@ -0,0 +1,61 @@
+/// Test -m[no-]unaligned-access and -m[no-]strict-align options.
+
+// RUN: %clang --target=loongarch64 -munaligned-access -fsyntax-only %s -### 2>&1 | \
+// RUN: FileCheck %s --check-prefix=CC1-UNALIGNED
+// RUN: %clang --target=loongarch64 -mno-unaligned-access -fsyntax-only %s -### 2>&1 | \
+// RUN: FileCheck %s --check-prefix=CC1-NO-UNALIGNED
+// RUN: %clang --target=loongarch64 -mstrict-align -fsyntax-only %s -### 2>&1 | \
+// RUN: FileCheck %s --check-prefix=CC1-NO-UNALIGNED
+// RUN: %clang --target=loongarch64 -mno-strict-align -fsyntax-only %s -### 2>&1 | \
+// RUN: FileCheck %s --check-prefix=CC1-UNALIGNED
+// RUN: %clang --target=loongarch64 -munaligned-access -mno-unaligned-access -fsyntax-only %s -### 2>&1 | \
+// RUN: FileCheck %s --check-prefix=CC1-NO-UNALIGNED
+// RUN: %clang --target=loongarch64 -mno-unaligned-access -munaligned-access -fsyntax-only %s -### 2>&1 | \
+// RUN: FileCheck %s --check-prefix=CC1-UNALIGNED
+// RUN: %clang --target=loongarch64 -mstrict-align -mno-strict-align -fsyntax-only %s -### 2>&1 | \
+// RUN: FileCheck %s --check-prefix=CC1-UNALIGNED
+// RUN: %clang --target=loongarch64 -mno-strict-align -mstrict-align -fsyntax-only %s -### 2>&1 | \
+// RUN: FileCheck %s --check-prefix=CC1-NO-UNALIGNED
+// RUN: %clang --target=loongarch64 -munaligned-access -mstrict-align -fsyntax-only %s -### 2>&1 | \
+// RUN: FileCheck %s --check-prefix=CC1-NO-UNALIGNED
+// RUN: %clang --target=loongarch64 -mstrict-align -munaligned-access -fsyntax-only %s -### 2>&1 | \
+// RUN: FileCheck %s --check-prefix=CC1-UNALIGNED
+// RUN: %clang --target=loongarch64 -mno-unaligned-access -mno-strict-align -fsyntax-only %s -### 2>&1 | \
+// RUN: FileCheck %s --check-prefix=CC1-UNALIGNED
+// RUN: %clang --target=loongarch64 -mno-strict-align -mno-unaligned-access -fsyntax-only %s -### 2>&1 | \
+// RUN: FileCheck %s --check-prefix=CC1-NO-UNALIGNED
+
+// RUN: %clang --target=loongarch64 -munaligned-access -S -emit-llvm %s -o - | \
+// RUN: FileCheck %s --check-prefix=IR-UNALIGNED
+// RUN: %clang --target=loongarch64 -mno-unaligned-access -S -emit-llvm %s -o - | \
+// RUN: FileCheck %s --check-prefix=IR-NO-UNALIGNED
+// RUN: %clang --target=loongarch64 -mstrict-align -S -emit-llvm %s -o - | \
+// RUN: FileCheck %s --check-prefix=IR-NO-UNALIGNED
+// RUN: %clang --target=loongarch64 -mno-strict-align -S -emit-llvm %s -o - | \
+// RUN: FileCheck %s --check-prefix=IR-UNALIGNED
+// RUN: %clang --target=loongarch64 -munaligned-access -mno-unaligned-access -S -emit-llvm %s -o - | \
+// RUN: FileCheck %s --check-prefix=IR-NO-UNALIGNED
+// RUN: %clang --target=loongarch64 -mno-unaligned-access -munaligned-access -S -emit-llvm %s -o - | \
+// RUN: FileCheck %s --check-prefix=IR-UNALIGNED
+// RUN: %clang --target=loongarch64 -mstrict-align -mno-strict-align -S -emit-llvm %s -o - | \
+// RUN: FileCheck %s --check-prefix=IR-UNALIGNED
+// RUN: %clang --target=loongarch64 -mno-strict-align -mstrict-align -S -emit-llvm %s -o - | \
+// RUN: FileCheck %s --check-prefix=IR-NO-UNALIGNED
+// RUN: %clang --target=loongarch64 -munaligned-access -mstrict-align -S -emit-llvm %s -o - | \
+// RUN: FileCheck %s --check-prefix=IR-NO-UNALIGNED
+// RUN: %clang --target=loongarch64 -mstrict-align -munaligned-access -S -emit-llvm %s -o - | \
+// RUN: FileCheck %s --check-prefix=IR-UNALIGNED
+// RUN: %clang --target=loongarch64 -mno-unaligned-access -mno-strict-align -S -emit-llvm %s -o - | \
+// RUN: FileCheck %s --check-prefix=IR-UNALIGNED
+// RUN: %clang --target=loongarch64 -mno-strict-align -mno-unaligned-access -S -emit-llvm %s -o - | \
+// RUN: FileCheck %s --check-prefix=IR-NO-UNALIGNED
+
+// CC1-UNALIGNED: "-target-feature" "+ual"
+// CC1-NO-UNALIGNED: "-target-feature" "-ual"
+
+// IR-UNALIGNED: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+ual{{(,.*)?}}"
+// IR-NO-UNALIGNED: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}-ual{{(,.*)?}}"
+
+int foo(void) {
+ return 3;
+}
Index: clang/test/Driver/loongarch-msoft-float.c
===================================================================
--- clang/test/Driver/loongarch-msoft-float.c
+++ clang/test/Driver/loongarch-msoft-float.c
@@ -8,12 +8,10 @@
// WARN: warning: argument unused during compilation: '-mfpu=64'
// WARN: warning: argument unused during compilation: '-mabi=lp64d'
-// CC1-NOT: "-target-feature"
-// CC1: "-target-feature" "+64bit" "-target-feature" "-f" "-target-feature" "-d"
-// CC1-NOT: "-target-feature"
+// CC1: "-target-feature" "-f"{{.*}} "-target-feature" "-d"
// CC1: "-target-abi" "lp64s"
-// IR: attributes #[[#]] ={{.*}}"target-features"="+64bit,-d,-f"
+// IR: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}-d,{{(.*,)?}}-f{{(,.*)?}}"
int foo(void) {
return 3;
Index: clang/test/Driver/loongarch-msingle-float.c
===================================================================
--- clang/test/Driver/loongarch-msingle-float.c
+++ clang/test/Driver/loongarch-msingle-float.c
@@ -8,12 +8,10 @@
// WARN: warning: argument unused during compilation: '-mfpu=0'
// WARN: warning: argument unused during compilation: '-mabi=lp64s'
-// CC1-NOT: "-target-feature"
-// CC1: "-target-feature" "+64bit" "-target-feature" "+f" "-target-feature" "-d"
-// CC1-NOT: "-target-feature"
+// CC1: "-target-feature" "+f"{{.*}} "-target-feature" "-d"
// CC1: "-target-abi" "lp64f"
-// IR: attributes #[[#]] ={{.*}}"target-features"="+64bit,+f,-d"
+// IR: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+f,{{(.*,)?}}-d"
int foo(void) {
return 3;
Index: clang/test/Driver/loongarch-mfpu.c
===================================================================
--- clang/test/Driver/loongarch-mfpu.c
+++ clang/test/Driver/loongarch-mfpu.c
@@ -16,24 +16,18 @@
// RUN: %clang --target=loongarch64 -mfpu=none -S -emit-llvm %s -o - | \
// RUN: FileCheck %s --check-prefix=IR-FPU0
-// CC1-FPU64-NOT: "-target-feature"
-// CC1-FPU64: "-target-feature" "+64bit" "-target-feature" "+f" "-target-feature" "+d"
-// CC1-FPU64-NOT: "-target-feature"
+// CC1-FPU64: "-target-feature" "+f"{{.*}} "-target-feature" "+d"
// CC1-FPU64: "-target-abi" "lp64d"
-// CC1-FPU32-NOT: "-target-feature"
-// CC1-FPU32: "-target-feature" "+64bit" "-target-feature" "+f" "-target-feature" "-d"
-// CC1-FPU32-NOT: "-target-feature"
+// CC1-FPU32: "-target-feature" "+f"{{.*}} "-target-feature" "-d"
// CC1-FPU32: "-target-abi" "lp64f"
-// CC1-FPU0-NOT: "-target-feature"
-// CC1-FPU0: "-target-feature" "+64bit" "-target-feature" "-f" "-target-feature" "-d"
-// CC1-FPU0-NOT: "-target-feature"
+// CC1-FPU0: "-target-feature" "-f"{{.*}} "-target-feature" "-d"
// CC1-FPU0: "-target-abi" "lp64s"
-// IR-FPU64: attributes #[[#]] ={{.*}}"target-features"="+64bit,+d,+f"
-// IR-FPU32: attributes #[[#]] ={{.*}}"target-features"="+64bit,+f,-d"
-// IR-FPU0: attributes #[[#]] ={{.*}}"target-features"="+64bit,-d,-f"
+// IR-FPU64: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+d,{{(.*,)?}}+f{{(,.*)?}}"
+// IR-FPU32: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+f,{{(.*,)?}}-d{{(,.*)?}}"
+// IR-FPU0: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}-d,{{(.*,)?}}-f{{(,.*)?}}"
int foo(void) {
return 3;
Index: clang/test/Driver/loongarch-mdouble-float.c
===================================================================
--- clang/test/Driver/loongarch-mdouble-float.c
+++ clang/test/Driver/loongarch-mdouble-float.c
@@ -8,12 +8,10 @@
// WARN: warning: argument unused during compilation: '-mfpu=0'
// WARN: warning: argument unused during compilation: '-mabi=lp64s'
-// CC1-NOT: "-target-feature"
-// CC1: "-target-feature" "+64bit" "-target-feature" "+f" "-target-feature" "+d"
-// CC1-NOT: "-target-feature"
+// CC1: "-target-feature" "+f"{{.*}} "-target-feature" "+d"
// CC1: "-target-abi" "lp64d"
-// IR: attributes #[[#]] ={{.*}}"target-features"="+64bit,+d,+f"
+// IR: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+d,{{(.*,)?}}+f{{(,.*)?}}"
int foo(void) {
return 3;
Index: clang/test/Driver/loongarch-march.c
===================================================================
--- clang/test/Driver/loongarch-march.c
+++ clang/test/Driver/loongarch-march.c
@@ -8,17 +8,17 @@
// RUN: FileCheck %s --check-prefix=IR-LA464
// CC1-LOONGARCH64-NOT: "-target-feature"
-// CC1-LOONGARCH64: "-target-feature" "+64bit" "-target-feature" "+f" "-target-feature" "+d"
+// CC1-LOONGARCH64: "-target-feature" "+64bit" "-target-feature" "+f" "-target-feature" "+d" "-target-feature" "+ual"
// CC1-LOONGARCH64-NOT: "-target-feature"
// CC1-LOONGARCH64: "-target-abi" "lp64d"
// CC1-LA464-NOT: "-target-feature"
-// CC1-LA464: "-target-feature" "+64bit" "-target-feature" "+f" "-target-feature" "+d" "-target-feature" "+lsx" "-target-feature" "+lasx"
+// CC1-LA464: "-target-feature" "+64bit" "-target-feature" "+f" "-target-feature" "+d" "-target-feature" "+lsx" "-target-feature" "+lasx" "-target-feature" "+ual"
// CC1-LA464-NOT: "-target-feature"
// CC1-LA464: "-target-abi" "lp64d"
-// IR-LOONGARCH64: attributes #[[#]] ={{.*}}"target-features"="+64bit,+d,+f"
-// IR-LA464: attributes #[[#]] ={{.*}}"target-features"="+64bit,+d,+f,+lasx,+lsx"
+// IR-LOONGARCH64: attributes #[[#]] ={{.*}}"target-features"="+64bit,+d,+f,+ual"
+// IR-LA464: attributes #[[#]] ={{.*}}"target-features"="+64bit,+d,+f,+lasx,+lsx,+ual"
int foo(void) {
return 3;
Index: clang/test/Driver/loongarch-default-features.c
===================================================================
--- clang/test/Driver/loongarch-default-features.c
+++ clang/test/Driver/loongarch-default-features.c
@@ -2,7 +2,7 @@
// RUN: %clang --target=loongarch64 -S -emit-llvm %s -o - | FileCheck %s --check-prefix=LA64
// LA32: "target-features"="+32bit"
-// LA64: "target-features"="+64bit,+d,+f"
+// LA64: "target-features"="+64bit,+d,+f,+ual"
int foo(void) {
return 3;
Index: clang/lib/Driver/ToolChains/Arch/LoongArch.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Arch/LoongArch.cpp
+++ clang/lib/Driver/ToolChains/Arch/LoongArch.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "LoongArch.h"
+#include "ToolChains/CommonArgs.h"
#include "clang/Basic/DiagnosticDriver.h"
#include "clang/Driver/Driver.h"
#include "clang/Driver/DriverDiagnostic.h"
@@ -133,4 +134,9 @@
D.Diag(diag::err_drv_loongarch_invalid_mfpu_EQ) << FPU;
}
}
+
+ // Select the `ual` feature determined by -m[no-]unaligned-access
+ // or the alias -m[no-]strict-align.
+ AddTargetFeature(Args, Features, options::OPT_munaligned_access,
+ options::OPT_mno_unaligned_access, "ual");
}
Index: clang/include/clang/Driver/Options.td
===================================================================
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -3652,12 +3652,14 @@
"to use the value implied by -march/-mcpu. Value will be reflected "
"in __riscv_v_fixed_vlen preprocessor define (RISC-V only)">;
-def munaligned_access : Flag<["-"], "munaligned-access">, Group<m_arm_Features_Group>,
- HelpText<"Allow memory accesses to be unaligned (AArch32/AArch64 only)">;
-def mno_unaligned_access : Flag<["-"], "mno-unaligned-access">, Group<m_arm_Features_Group>,
- HelpText<"Force all memory accesses to be aligned (AArch32/AArch64 only)">;
+def munaligned_access : Flag<["-"], "munaligned-access">, Group<m_Group>,
+ HelpText<"Allow memory accesses to be unaligned">;
+def mno_unaligned_access : Flag<["-"], "mno-unaligned-access">, Group<m_Group>,
+ HelpText<"Force all memory accesses to be aligned">;
def mstrict_align : Flag<["-"], "mstrict-align">, Alias<mno_unaligned_access>, Flags<[CC1Option,HelpHidden]>,
HelpText<"Force all memory accesses to be aligned (same as mno-unaligned-access)">;
+def mno_strict_align : Flag<["-"], "mno-strict-align">, Alias<munaligned_access>, Flags<[CC1Option,HelpHidden]>,
+ HelpText<"Allow memory accesses to be unaligned (same as munaligned-access)">;
def mno_thumb : Flag<["-"], "mno-thumb">, Group<m_arm_Features_Group>;
def mrestrict_it: Flag<["-"], "mrestrict-it">, Group<m_arm_Features_Group>,
HelpText<"Disallow generation of complex IT blocks.">;
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits