================
@@ -0,0 +1,105 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
UTC_ARGS: --version 6
+; RUN: llc -verify-machineinstrs -force-streaming
-mtriple=aarch64-none-linux-gnu -mattr=+sme2p3 < %s | FileCheck %s
+
+target triple = "aarch64-none-linux-gnu"
+
+define <vscale x 16 x i8> @luti6_zt_i8(<vscale x 16 x i8> %x) #0 {
+; CHECK-LABEL: luti6_zt_i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: luti6 z0.b, zt0, z0
+; CHECK-NEXT: ret
+ %res = tail call <vscale x 16 x i8> @llvm.aarch64.sme.luti6.zt(
+ i32 0, <vscale x 16 x i8> %x)
+ ret <vscale x 16 x i8> %res
+}
+
+define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>,
+ <vscale x 16 x i8> } @luti6_zt_i8_x4(<vscale x 16 x i8> %a,
+ <vscale x 16 x i8> %b,
+ <vscale x 16 x i8> %c) #0 {
+; CHECK-LABEL: luti6_zt_i8_x4:
+; CHECK: // %bb.0:
+; CHECK-NEXT: luti6 { z0.b - z3.b }, zt0, { z0 - z2 }
+; CHECK-NEXT: ret
+ %res = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>,
+ <vscale x 16 x i8>, <vscale x 16 x i8> }
+ @llvm.aarch64.sme.luti6.zt.x4(
+ i32 0, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b,
+ <vscale x 16 x i8> %c)
+ ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>,
+ <vscale x 16 x i8> } %res
+}
+
+define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>,
+ <vscale x 8 x i16> } @luti6_i16_x4(<vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %b,
+ <vscale x 16 x i8> %x,
+ <vscale x 16 x i8> %y) #0 {
+; CHECK-LABEL: luti6_i16_x4:
+; CHECK: // %bb.0:
+; CHECK-NEXT: luti6 { z0.h - z3.h }, { z0.h, z1.h }, { z2, z3 }[1]
+; CHECK-NEXT: ret
+ %res = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>,
+ <vscale x 8 x i16>, <vscale x 8 x i16> }
+ @llvm.aarch64.sme.luti6.lane.x4.nxv8i16(
+ <vscale x 8 x i16> %a, <vscale x 8 x i16> %b,
+ <vscale x 16 x i8> %x, <vscale x 16 x i8> %y, i32 1)
+ ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>,
+ <vscale x 8 x i16> } %res
+}
+
+define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat> } @luti6_bf16_x4(<vscale x 8 x bfloat> %a,
+ <vscale x 8 x bfloat> %b,
+ <vscale x 16 x i8> %x,
+ <vscale x 16 x i8> %y) #0 {
+; CHECK-LABEL: luti6_bf16_x4:
+; CHECK: // %bb.0:
+; CHECK-NEXT: luti6 { z0.h - z3.h }, { z0.h, z1.h }, { z2, z3 }[0]
+; CHECK-NEXT: ret
+ %res = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat>, <vscale x 8 x bfloat> }
+ @llvm.aarch64.sme.luti6.lane.x4.nxv8bf16(
+ <vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b,
+ <vscale x 16 x i8> %x, <vscale x 16 x i8> %y, i32 0)
+ ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat> } %res
+}
+
+define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>,
+ <vscale x 8 x half> } @luti6_f16_x4(<vscale x 8 x half> %a,
+ <vscale x 8 x half> %b,
+ <vscale x 16 x i8> %x,
+ <vscale x 16 x i8> %y) #0 {
+; CHECK-LABEL: luti6_f16_x4:
+; CHECK: // %bb.0:
+; CHECK-NEXT: luti6 { z0.h - z3.h }, { z0.h, z1.h }, { z2, z3 }[1]
+; CHECK-NEXT: ret
+ %res = tail call { <vscale x 8 x half>, <vscale x 8 x half>,
+ <vscale x 8 x half>, <vscale x 8 x half> }
+ @llvm.aarch64.sme.luti6.lane.x4.nxv8f16(
+ <vscale x 8 x half> %a, <vscale x 8 x half> %b,
+ <vscale x 16 x i8> %x, <vscale x 16 x i8> %y, i32 1)
+ ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>,
+ <vscale x 8 x half> } %res
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sme.luti6.zt(
+ i32, <vscale x 16 x i8>)
+declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>,
+ <vscale x 16 x i8> } @llvm.aarch64.sme.luti6.zt.x4(
+ i32, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>,
+ <vscale x 8 x i16> } @llvm.aarch64.sme.luti6.lane.x4.nxv8i16(
+ <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 16 x i8>,
+ <vscale x 16 x i8>, i32)
+declare { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>,
+ <vscale x 8 x bfloat> } @llvm.aarch64.sme.luti6.lane.x4.nxv8bf16(
+ <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 16 x i8>,
+ <vscale x 16 x i8>, i32)
+declare { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>,
+ <vscale x 8 x half> } @llvm.aarch64.sme.luti6.lane.x4.nxv8f16(
+ <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 16 x i8>,
+ <vscale x 16 x i8>, i32)
----------------
jthackray wrote:
Fixed.
https://github.com/llvm/llvm-project/pull/187046
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits