[clang] [llvm] [AArch64][clang][llvm] Add support for Armv9.7-A lookup table intrinsics (PR #187046)

Jonathan Thackray via cfe-commits Wed, 08 Apr 2026 07:03:48 -0700

================
@@ -0,0 +1,105 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 6
+; RUN: llc -verify-machineinstrs -force-streaming 
-mtriple=aarch64-none-linux-gnu -mattr=+sme2p3 < %s | FileCheck %s
+
+target triple = "aarch64-none-linux-gnu"
+
+define <vscale x 16 x i8> @luti6_zt_i8(<vscale x 16 x i8> %x) #0 {
+; CHECK-LABEL: luti6_zt_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    luti6 z0.b, zt0, z0
+; CHECK-NEXT:    ret
+  %res = tail call <vscale x 16 x i8> @llvm.aarch64.sme.luti6.zt(
+      i32 0, <vscale x 16 x i8> %x)
+  ret <vscale x 16 x i8> %res
+}
+
+define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>,
+         <vscale x 16 x i8> } @luti6_zt_i8_x4(<vscale x 16 x i8> %a,
+                                              <vscale x 16 x i8> %b,
+                                              <vscale x 16 x i8> %c) #0 {
+; CHECK-LABEL: luti6_zt_i8_x4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    luti6 { z0.b - z3.b }, zt0, { z0 - z2 }
+; CHECK-NEXT:    ret
+  %res = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>,
+                     <vscale x 16 x i8>, <vscale x 16 x i8> }
+      @llvm.aarch64.sme.luti6.zt.x4(
+          i32 0, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b,
+          <vscale x 16 x i8> %c)
+  ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>,
+        <vscale x 16 x i8> } %res
+}
+
+define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>,
+         <vscale x 8 x i16> } @luti6_i16_x4(<vscale x 8 x i16> %a,
+                                            <vscale x 8 x i16> %b,
+                                            <vscale x 16 x i8> %x,
+                                            <vscale x 16 x i8> %y) #0 {
+; CHECK-LABEL: luti6_i16_x4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    luti6 { z0.h - z3.h }, { z0.h, z1.h }, { z2, z3 }[1]
+; CHECK-NEXT:    ret
+  %res = tail call { <vscale x 8 x i16>, <vscale x 8 x i16>,
+                     <vscale x 8 x i16>, <vscale x 8 x i16> }
+      @llvm.aarch64.sme.luti6.lane.x4.nxv8i16(
+          <vscale x 8 x i16> %a, <vscale x 8 x i16> %b,
+          <vscale x 16 x i8> %x, <vscale x 16 x i8> %y, i32 1)
+  ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>,
+        <vscale x 8 x i16> } %res
+}
+
+define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>,
+         <vscale x 8 x bfloat> } @luti6_bf16_x4(<vscale x 8 x bfloat> %a,
+                                                <vscale x 8 x bfloat> %b,
+                                                <vscale x 16 x i8> %x,
+                                                <vscale x 16 x i8> %y) #0 {
+; CHECK-LABEL: luti6_bf16_x4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    luti6 { z0.h - z3.h }, { z0.h, z1.h }, { z2, z3 }[0]
+; CHECK-NEXT:    ret
+  %res = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>,
+                     <vscale x 8 x bfloat>, <vscale x 8 x bfloat> }
+      @llvm.aarch64.sme.luti6.lane.x4.nxv8bf16(
+          <vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b,
+          <vscale x 16 x i8> %x, <vscale x 16 x i8> %y, i32 0)
+  ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>,
+        <vscale x 8 x bfloat> } %res
+}
+
+define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>,
+         <vscale x 8 x half> } @luti6_f16_x4(<vscale x 8 x half> %a,
+                                             <vscale x 8 x half> %b,
+                                             <vscale x 16 x i8> %x,
+                                             <vscale x 16 x i8> %y) #0 {
+; CHECK-LABEL: luti6_f16_x4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    luti6 { z0.h - z3.h }, { z0.h, z1.h }, { z2, z3 }[1]
+; CHECK-NEXT:    ret
+  %res = tail call { <vscale x 8 x half>, <vscale x 8 x half>,
+                     <vscale x 8 x half>, <vscale x 8 x half> }
+      @llvm.aarch64.sme.luti6.lane.x4.nxv8f16(
+          <vscale x 8 x half> %a, <vscale x 8 x half> %b,
+          <vscale x 16 x i8> %x, <vscale x 16 x i8> %y, i32 1)
+  ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>,
+        <vscale x 8 x half> } %res
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sme.luti6.zt(
+    i32, <vscale x 16 x i8>)
+declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>,
+          <vscale x 16 x i8> } @llvm.aarch64.sme.luti6.zt.x4(
+    i32, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>,
+          <vscale x 8 x i16> } @llvm.aarch64.sme.luti6.lane.x4.nxv8i16(
+    <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 16 x i8>,
+    <vscale x 16 x i8>, i32)
+declare { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>,
+          <vscale x 8 x bfloat> } @llvm.aarch64.sme.luti6.lane.x4.nxv8bf16(
+    <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 16 x i8>,
+    <vscale x 16 x i8>, i32)
+declare { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>,
+          <vscale x 8 x half> } @llvm.aarch64.sme.luti6.lane.x4.nxv8f16(
+    <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 16 x i8>,
+    <vscale x 16 x i8>, i32)
----------------
jthackray wrote:


Fixed.

https://github.com/llvm/llvm-project/pull/187046
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [AArch64][clang][llvm] Add support for Armv9.7-A lookup table intrinsics (PR #187046)

Reply via email to