This patch series adds support for the following intrinsics implementing
FEAT_SME_TMOP. All of these require the +sme-tmop arch option.
A new intrinsic shape and a new register constraint is required. This patch
adds these, along with tests.

Specifically:
  - svtmopa_lane_za16[_mf8_mf8] (requires +sme-f8f16)
  - svtmopa_lane_za16[_bf16_bf16] (requires +sme-b16b16)
  - svtmopa_lane_za16[_f16_f16] (requires +sme-f16f16)
  - svtmopa_lane_za32[_mf8_mf8] (requires +sme-f8f32)
  - svtmopa_lane_za32[_bf16_bf16] (requires +sme-b16b16)
  - svtmopa_lane_za32[_f16_f16]
  - svtmopa_lane_za32[_f32_f32]
  - svtmopa_lane_za32[_s16_s16]
  - svtmopa_lane_za32[_u16_u16]
  - svtmopa_lane_za32[_s8_s8]
  - svtmopa_lane_za32[_u8_u8]
  - svtmopa_lane_za32[_s8_u8]
  - svtmopa_lane_za32[_u8_s8]

gcc/
        * config/aarch64/aarch64-c.cc: (__ARM_FEATURE_SME_TMOP): Add define.
        * config/aarch64/aarch64-sme.md:
        (@aarch64_sme_lane_<optab><VNx4SI_ONLY:mode><SVE_FULL_BHI:mode>): Add
        new insn.
        (@aarch64_sme_lane_<optab><SVE_FULL_H:mode><SVE_FULL_HF:mode>):
        Likewise.
        (@aarch64_sme_lane_<optab><SVE_FULL_S:mode><SVE_FULL_BHSF:mode>):
        Likewise.
        (@aarch64_sme_lane_<optab><SME_ZA_TMOP_FP8:mode><SME_ZA_FP8_x1:mode>):
        Likewise.
        * config/aarch64/aarch64-sve-builtins-shapes.cc:
        (ternary_za_uint_dual_single): Add new shape.
        * config/aarch64/aarch64-sve-builtins-shapes.h: Likewise.
        * config/aarch64/aarch64-sve-builtins-sme.cc: (svtmopa_lane_za_impl):
        Add new function impl class.
        (svtmopa_lane_za): Add new FUNCTION.
        * config/aarch64/aarch64-sve-builtins-sme.def: (svtmopa_lane): Add new
        DEF_SME_ZA_FUNCTION_GS entries.
        * config/aarch64/aarch64-sve-builtins-sme.h: (svtmopa_lane_za): Add new.
        * config/aarch64/aarch64-sve-builtins.cc: (TYPES_tmop_base,
        TYPES_tmop_h_float, TYPES_tmop_h_bfloat, TYPES_tmop_h_mf8,
        TYPES_tmop_s_mf8): Add new DEF_SVE_TYPES_ARRAYs.
        (function_resolver::resolve_to): Fix documentation comment.
        * config/aarch64/aarch64.h: (TARGET_STREAMING_SME_TMOP): Add new define.
        * config/aarch64/constraints.md (Uwo): Add new constraint for Control
        Vector Register in TMOP operations.
        * config/aarch64/iterators.md: (SVE_FULL_H, SVE_FULL_BHSF,
        SME_ZA_TMOP_FP8): Add new mode iterators.
        (UNSPEC_SME_FTMOPA, UNSPEC_SME_FTMOPA_FP8, UNSPEC_SME_STMOPA,
        UNSPEC_SME_SUTMOPA, UNSPEC_SME_USTMOPA, UNSPEC_SME_UTMOPA): Add new
        unspecs.
        (SME_TMOP_INT, SME_TMOP_FP, SME_TMOP_FP8): Add new iterators.
        (optab): Update with new unspecs.

gcc/testsuite/
        * gcc.target/aarch64/pragma_cpp_predefs_4.c: Add tests checking that the
        sme-tmop prefef is off by default, and checks for feature dependencies.
        * gcc.target/aarch64/sme2/acle-asm/test_sme2_acle.h: (TEST_ZA_TMOP) Add
        testing macro.
        * gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za16_bf16_bf16.c:
        New test.
        * gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za16_f16_f16.c:
        Likewise.
        * gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za16_mf8_mf8.c:
        Likewise.
        * gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za32_bf16_bf16.c:
        Likewise.
        * gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za32_f16_f16.c:
        Likewise.
        * gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za32_f32_f32.c:
        Likewise.
        * gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za32_mf8_mf8.c:
        Likewise.
        * gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za32_s8_s8.c: Likewise.
        * gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za32_s8_u8.c: Likewise.
        * gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za32_s16_s16.c:
        Likewise.
        * gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za32_u8_s8.c: Likewise.
        * gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za32_u8_u8.c: Likewise.
        * gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za32_u16_u16.c:
        Likewise.
        * gcc.target/aarch64/sve/acle/general-c/ternary_za_uint_dual_single_1.c:
        Likewise.
        * lib/target-supports.exp: Add check_effective_target support for
        sme-tmop.
---
 gcc/config/aarch64/aarch64-c.cc               |   2 +
 gcc/config/aarch64/aarch64-sme.md             | 102 ++++++++++++++++++
 .../aarch64/aarch64-sve-builtins-shapes.cc    |  39 +++++++
 .../aarch64/aarch64-sve-builtins-shapes.h     |   1 +
 .../aarch64/aarch64-sve-builtins-sme.cc       |  34 ++++++
 .../aarch64/aarch64-sve-builtins-sme.def      |  24 +++++
 gcc/config/aarch64/aarch64-sve-builtins-sme.h |   1 +
 gcc/config/aarch64/aarch64-sve-builtins.cc    |  32 +++++-
 gcc/config/aarch64/aarch64.h                  |   3 +
 gcc/config/aarch64/constraints.md             |   4 +
 gcc/config/aarch64/iterators.md               |  26 +++++
 .../gcc.target/aarch64/pragma_cpp_predefs_4.c |  16 +++
 .../aarch64/sme2/acle-asm/test_sme2_acle.h    |  22 ++++
 .../sme2/acle-asm/tmopa_lane_za16_bf16_bf16.c |  76 +++++++++++++
 .../sme2/acle-asm/tmopa_lane_za16_f16_f16.c   |  76 +++++++++++++
 .../sme2/acle-asm/tmopa_lane_za16_mf8_mf8.c   |  83 ++++++++++++++
 .../sme2/acle-asm/tmopa_lane_za32_bf16_bf16.c |  76 +++++++++++++
 .../sme2/acle-asm/tmopa_lane_za32_f16_f16.c   |  76 +++++++++++++
 .../sme2/acle-asm/tmopa_lane_za32_f32_f32.c   |  76 +++++++++++++
 .../sme2/acle-asm/tmopa_lane_za32_mf8_mf8.c   |  83 ++++++++++++++
 .../sme2/acle-asm/tmopa_lane_za32_s16_s16.c   |  76 +++++++++++++
 .../sme2/acle-asm/tmopa_lane_za32_s8_s8.c     |  76 +++++++++++++
 .../sme2/acle-asm/tmopa_lane_za32_s8_u8.c     |  76 +++++++++++++
 .../sme2/acle-asm/tmopa_lane_za32_u16_u16.c   |  76 +++++++++++++
 .../sme2/acle-asm/tmopa_lane_za32_u8_s8.c     |  76 +++++++++++++
 .../sme2/acle-asm/tmopa_lane_za32_u8_u8.c     |  76 +++++++++++++
 .../general-c/ternary_za_uint_dual_single_1.c |  87 +++++++++++++++
 gcc/testsuite/lib/target-supports.exp         |   1 +
 28 files changed, 1395 insertions(+), 1 deletion(-)
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za16_bf16_bf16.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za16_f16_f16.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za16_mf8_mf8.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za32_bf16_bf16.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za32_f16_f16.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za32_f32_f32.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za32_mf8_mf8.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za32_s16_s16.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za32_s8_s8.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za32_s8_u8.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za32_u16_u16.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za32_u8_s8.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za32_u8_u8.c
 create mode 100644 
gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_za_uint_dual_single_1.c

diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
index b52ea7649f9..6f8fd1bea6b 100644
--- a/gcc/config/aarch64/aarch64-c.cc
+++ b/gcc/config/aarch64/aarch64-c.cc
@@ -308,6 +308,8 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
                        "__ARM_FEATURE_SME2p1", pfile);
   aarch64_def_or_undef (TARGET_FAMINMAX, "__ARM_FEATURE_FAMINMAX", pfile);
   aarch64_def_or_undef (TARGET_PCDPHINT, "__ARM_FEATURE_PCDPHINT", pfile);
+  aarch64_def_or_undef (AARCH64_HAVE_ISA (SME_TMOP),
+                       "__ARM_FEATURE_SME_TMOP", pfile);
 
   // Function multi-versioning defines
   aarch64_def_or_undef (targetm.has_ifunc_p (),
diff --git a/gcc/config/aarch64/aarch64-sme.md 
b/gcc/config/aarch64/aarch64-sme.md
index ca3ed471657..a40bb2541a1 100644
--- a/gcc/config/aarch64/aarch64-sme.md
+++ b/gcc/config/aarch64/aarch64-sme.md
@@ -44,11 +44,13 @@
 ;; ---- [INT] Dot product
 ;; ---- [INT] Ternary widening arithmetic on ZA slice
 ;; ---- [INT] Sum of outer products
+;; ---- [INT] Sparse outer product
 ;; ---- [FP] Dot product
 ;; ---- [FP8] Dot product
 ;; ---- [FP] Ternary arithmetic on ZA slice
 ;; ---- [FP] Ternary widening arithmetic on ZA slice
 ;; ---- [FP] Sum of outer products
+;; ---- [FP] Sparse outer product
 ;;
 ;; == Table lookup
 ;; ---- Table lookup
@@ -1796,6 +1798,37 @@ (define_insn 
"@aarch64_sme_<optab><VNx4SI_ONLY:mode><VNx4SI_ONLY:mode>"
   "<optab>\tza%0.s, %1/m, %2/m, %3.s, %4.s"
 )
 
+;; -------------------------------------------------------------------------
+;; ---- [INT] Sparse outer product
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - STMOPA
+;; - UTMOPA
+;; - SUTMOPA
+;; - USTMOPA
+;; -------------------------------------------------------------------------
+;; svtmopa_lane_za32[_s16_s16]
+;; svtmopa_lane_za32[_u16_u16]
+;; svtmopa_lane_za32[_s8_s8]
+;; svtmopa_lane_za32[_u8_u8]
+;; svtmopa_lane_za32[_s8_u8]
+;; svtmopa_lane_za32[_u8_s8]
+(define_insn "@aarch64_sme_lane_<optab><VNx4SI_ONLY:mode><SVE_FULL_BHI:mode>"
+  [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+       (unspec:VNx4SI_ONLY
+         [(reg:VNx4SI_ONLY ZA_REGNUM)
+          (reg:DI SME_STATE_REGNUM)
+          (match_operand:DI 0 "const_int_operand")
+          (match_operand:<SVE_FULL_BHI:VDOUBLE> 1 "aligned_register_operand" 
"Uw2")
+          (match_operand:SVE_FULL_BHI 2 "register_operand" "w")
+          (match_operand:VNx16QI 3 "register_operand" "Uwo")
+          (match_operand:DI 4 "const_int_operand")
+         ]
+         SME_TMOP_INT))]
+   "TARGET_STREAMING_SME_TMOP"
+   "<optab>\tza%0.s, %1, %2.<SVE_FULL_BHI:Vetype>, %3[%4]"
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- [FP] Dot product
 ;; -------------------------------------------------------------------------
@@ -2719,6 +2752,75 @@ (define_insn 
"@aarch64_sme_<optab><SME_ZA_F8F16_32:mode><VNx16QI_ONLY:mode>"
   "<optab>\tza%0.<SME_ZA_F8F16_32:Vetype>, %1/m, %2/m, %3.b, %4.b"
 )
 
+;; -------------------------------------------------------------------------
+;; ---- [FP] Sparse outer product
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - BFTMOPA (SME_TMOP)
+;; - FTMOPA (SME_TMOP)
+;; -------------------------------------------------------------------------
+;; svtmopa_lane_za16[_bf16_bf16]
+;; svtmopa_lane_za16[_f16_f16]
+(define_insn "@aarch64_sme_lane_<optab><SVE_FULL_H:mode><SVE_FULL_HF:mode>"
+  [(set (reg:SVE_FULL_H ZA_REGNUM)
+       (unspec:SVE_FULL_H
+         [(reg:SVE_FULL_H ZA_REGNUM)
+          (reg:DI SME_STATE_REGNUM)
+          (match_operand:DI 0 "const_int_operand")
+          (match_operand:<SVE_FULL_HF:VDOUBLE> 1 "aligned_register_operand" 
"Uw2")
+          (match_operand:SVE_FULL_HF 2 "register_operand" "w")
+          (match_operand:VNx16QI 3 "register_operand" "Uwo")
+          (match_operand:DI 4 "const_int_operand")
+         ]
+         SME_TMOP_FP))]
+  "TARGET_STREAMING_SME_TMOP && (
+    <SVE_FULL_HF:MODE>mode == VNx8HFmode
+    ? TARGET_STREAMING_SME_F16F16
+    : TARGET_STREAMING_SME_B16B16)"
+   "<SVE_FULL_HF:b><optab>\tza%0.h, %1, %2.h, %3[%4]"
+)
+
+;; svtmopa_lane_za32[_bf16_bf16]
+;; svtmopa_lane_za32[_f16_f16]
+;; svtmopa_lane_za32[_f32_f32]
+(define_insn "@aarch64_sme_lane_<optab><SVE_FULL_S:mode><SVE_FULL_BHSF:mode>"
+  [(set (reg:SVE_FULL_S ZA_REGNUM)
+       (unspec:SVE_FULL_S
+         [(reg:SVE_FULL_S ZA_REGNUM)
+          (reg:DI SME_STATE_REGNUM)
+          (match_operand:DI 0 "const_int_operand")
+          (match_operand:<SVE_FULL_BHSF:VDOUBLE> 1 "aligned_register_operand" 
"Uw2")
+          (match_operand:SVE_FULL_BHSF 2 "register_operand" "w")
+          (match_operand:VNx16QI 3 "register_operand" "Uwo")
+          (match_operand:DI 4 "const_int_operand")
+         ]
+         SME_TMOP_FP))]
+   "TARGET_STREAMING_SME_TMOP"
+   "<SVE_FULL_BHSF:b><optab>\tza%0.s, %1, %2.<SVE_FULL_BHSF:Vetype>, %3[%4]"
+)
+
+;; svtmopa_lane_za16[_mf8_mf8]_fpm
+;; svtmopa_lane_za32[_mf8_mf8]_fpm
+(define_insn 
"@aarch64_sme_lane_<optab><SME_ZA_TMOP_FP8:mode><SME_ZA_FP8_x1:mode>"
+  [(set (reg:SME_ZA_TMOP_FP8 ZA_REGNUM)
+       (unspec:SME_ZA_TMOP_FP8
+         [(reg:SME_ZA_TMOP_FP8 ZA_REGNUM)
+          (reg:DI SME_STATE_REGNUM)
+          (match_operand:DI 0 "const_int_operand")
+          (match_operand:<SME_ZA_FP8_x1:VDOUBLE> 1 "aligned_register_operand" 
"Uw2")
+          (match_operand:SME_ZA_FP8_x1 2 "register_operand" "w")
+          (match_operand:VNx16QI 3 "register_operand" "Uwo")
+          (match_operand:DI 4 "const_int_operand")
+          (reg:DI FPM_REGNUM)
+         ]
+         SME_TMOP_FP8))]
+  "TARGET_STREAMING_SME_TMOP && (
+    <SME_ZA_TMOP_FP8:MODE>mode == VNx8HImode
+    ? TARGET_STREAMING_SME_F8F16
+    : TARGET_STREAMING_SME_F8F32)"
+   "<optab>\tza%0.<SME_ZA_TMOP_FP8:Vetype>, %1, %2.<SME_ZA_FP8_x1:Vetype>, 
%3[%4]"
+)
+
 ;; =========================================================================
 ;; == Table lookup
 ;; =========================================================================
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc 
b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
index 7d5376124e5..69d179b8ee9 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
@@ -4804,6 +4804,45 @@ struct ternary_uintq_intq_opt_n_def
 };
 SHAPE (ternary_uintq_intq_opt_n)
 
+/* void svfoo_t0[_t1_t2](uint64_t, sv<t1>x2_t, sv<t2>_t, svuint8_t, uint64_t)
+   where the first argument is a ZA tile.
+   and the fifth argument is a control index (0-3)  */
+struct ternary_za_uint_dual_single_def : public overloaded_base<1>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const override
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "_,su64,u1,v2,vu8,su64", group, MODE_none);
+  }
+
+  bool
+  check (function_checker &c) const override
+  {
+    return (c.require_immediate_range (0, 0, c.num_za_tiles () - 1)
+           && c.require_immediate_range (4, 0, 3));
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    sve_type type1;
+    type_suffix_index type2;
+    if (!r.check_num_arguments (r.fpm_mode == FPM_set ? 6: 5)
+       || !r.require_integer_immediate (0)
+       || (type1 = r.infer_vector_or_tuple_type (1, 2)) == NUM_TYPE_SUFFIXES
+       || (type2 = r.infer_vector_type (2)) == NUM_TYPE_SUFFIXES
+       || !r.require_vector_type (3, VECTOR_TYPE_svuint8_t)
+       || !r.require_integer_immediate (4)
+       || (r.fpm_mode == FPM_set && !r.require_scalar_type (5, "uint64_t")))
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, r.type_suffix_ids[0],
+                        type1.type, type2);
+  }
+};
+SHAPE (ternary_za_uint_dual_single)
+
 /* svbool_t svfoo[_<t0>](sv<t0>_t, sv<t0>_t, uint64_t)
 
    where the final argument is an integer constant expression in the
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h 
b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h
index b2c927542a8..14afae02ab5 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h
@@ -237,6 +237,7 @@ namespace aarch64_sve
     extern const function_shape *const ternary_uintq_intq;
     extern const function_shape *const ternary_uintq_intq_lane;
     extern const function_shape *const ternary_uintq_intq_opt_n;
+    extern const function_shape *const ternary_za_uint_dual_single;
     extern const function_shape *const tmad;
     extern const function_shape *const unary;
     extern const function_shape *const unary_convert;
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sme.cc 
b/gcc/config/aarch64/aarch64-sve-builtins-sme.cc
index 1b809492da4..d79ccdcf705 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sme.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sme.cc
@@ -461,6 +461,39 @@ public:
   }
 };
 
+class svtmopa_lane_za_impl: public read_write_za<function_base>
+{
+public:
+  int
+  unspec_for (const function_instance &instance) const
+  {
+    if (instance.fpm_mode == FPM_set)
+      return UNSPEC_SME_FTMOPA_FP8;
+    auto &suffix1 = instance.type_suffix (1);
+    if (!suffix1.integer_p)
+      return UNSPEC_SME_FTMOPA;
+    auto &suffix2 = instance.type_suffix (2);
+    if (suffix1.unsigned_p && suffix2.unsigned_p)
+      return UNSPEC_SME_UTMOPA;
+    else if (!suffix1.unsigned_p && !suffix2.unsigned_p)
+      return UNSPEC_SME_STMOPA;
+    else if (suffix1.unsigned_p && !suffix2.unsigned_p)
+      return UNSPEC_SME_USTMOPA;
+    else
+      return UNSPEC_SME_SUTMOPA;
+  }
+
+  rtx
+  expand (function_expander &e) const override
+  {
+    insn_code icode;
+    machine_mode za_mode = e.vector_mode (0);
+    machine_mode v_mode = e.tuple_mode (2);
+    icode = code_for_aarch64_sme_lane (unspec_for (e), za_mode, v_mode);
+    return e.use_exact_insn (icode);
+  }
+};
+
 class svundef_za_impl : public write_za<function_base>
 {
 public:
@@ -676,6 +709,7 @@ FUNCTION (svsuvdot_lane_za, sme_2mode_lane_function, 
(UNSPEC_SME_SUVDOT,
                                                      -1, -1))
 FUNCTION (svsumopa_za, sme_2mode_function, (UNSPEC_SME_SUMOPA, -1, -1))
 FUNCTION (svsumops_za, sme_2mode_function, (UNSPEC_SME_SUMOPS, -1, -1))
+FUNCTION (svtmopa_lane_za, svtmopa_lane_za_impl,)
 FUNCTION (svundef_za, svundef_za_impl, )
 FUNCTION (svusdot_za, sme_2mode_function, (-1, UNSPEC_SME_USDOT, -1))
 FUNCTION (svusdot_lane_za, sme_2mode_lane_function, (-1, UNSPEC_SME_USDOT, -1))
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sme.def 
b/gcc/config/aarch64/aarch64-sve-builtins-sme.def
index 1c909834835..5decb0167c5 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sme.def
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sme.def
@@ -307,6 +307,30 @@ DEF_SME_ZA_FUNCTION_GS_FPM (svmla, 
binary_za_slice_opt_single, za_s_mf8, vg1x24,
 DEF_SME_ZA_FUNCTION_GS_FPM (svmopa, binary_za_m, za_s_mf8, none, za_m, set)
 #undef REQUIRED_EXTENSIONS
 
+#define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME_TMOP)
+DEF_SME_ZA_FUNCTION_GS (svtmopa_lane, ternary_za_uint_dual_single, tmop_base, 
none, none)
+#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME_TMOP \
+                                           | AARCH64_FL_SME_F16F16)
+DEF_SME_ZA_FUNCTION_GS (svtmopa_lane, ternary_za_uint_dual_single, 
tmop_h_float, none, none)
+#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME_TMOP \
+                                           | AARCH64_FL_SME_B16B16)
+DEF_SME_ZA_FUNCTION_GS (svtmopa_lane, ternary_za_uint_dual_single, 
tmop_h_bfloat, none, none)
+#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME_TMOP \
+                                           | AARCH64_FL_SME_F8F16)
+DEF_SME_ZA_FUNCTION_GS_FPM (svtmopa_lane, ternary_za_uint_dual_single, 
tmop_h_mf8, none, none, set)
+#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME_TMOP \
+                                           | AARCH64_FL_SME_F8F32)
+DEF_SME_ZA_FUNCTION_GS_FPM (svtmopa_lane, ternary_za_uint_dual_single, 
tmop_s_mf8, none, none, set)
+#undef REQUIRED_EXTENSIONS
+
 #undef DEF_SME_ZA_FUNCTION
 #undef DEF_SME_ZA_FUNCTION_GS
 #undef DEF_SME_ZA_FUNCTION_GS_FPM
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sme.h 
b/gcc/config/aarch64/aarch64-sve-builtins-sme.h
index 26ccec14d72..4ee674b6108 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sme.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sme.h
@@ -70,6 +70,7 @@ namespace aarch64_sve
     extern const function_base *const svsuvdot_lane_za;
     extern const function_base *const svsumopa_za;
     extern const function_base *const svsumops_za;
+    extern const function_base *const svtmopa_lane_za;
     extern const function_base *const svusdot_za;
     extern const function_base *const svusdot_lane_za;
     extern const function_base *const svusvdot_lane_za;
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc 
b/gcc/config/aarch64/aarch64-sve-builtins.cc
index d25e5437ba8..27474b2972c 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -756,6 +756,30 @@ CONSTEXPR const group_suffix_info group_suffixes[] = {
 #define TYPES_mop_i16i64_unsigned(S, D, T) \
   D (za64, u16)
 
+/* _za32 x { _s8_s8 _u8_u8
+            _s8_u8 _u8_s8
+            _s16_s16 _u16_u16
+            _bf16_bf16 _f16_f16
+            _f32_f32 }.  */
+#define TYPES_tmop_base(S, D, T) \
+  T (za32, s8, s8), T (za32, u8, u8), \
+  T (za32, s8, u8), T (za32, u8, s8), \
+  T (za32, s16, s16), T (za32, u16, u16), \
+  T (za32, bf16, bf16), T (za32, f16, f16), \
+  T (za32, f32, f32)
+
+#define TYPES_tmop_h_float(S, D, T) \
+  T (za16, f16, f16)
+
+#define TYPES_tmop_h_bfloat(S, D, T) \
+  T (za16, bf16, bf16)
+
+#define TYPES_tmop_h_mf8(S, D, T) \
+  T (za16, mf8, mf8)
+
+#define TYPES_tmop_s_mf8(S, D, T) \
+  T (za32, mf8, mf8)
+
 /* _za.  */
 #define TYPES_za(S, D, T) \
   S (za)
@@ -890,6 +914,12 @@ DEF_SVE_TYPES_ARRAY (mop_base_unsigned);
 DEF_SVE_TYPES_ARRAY (mop_i16i64);
 DEF_SVE_TYPES_ARRAY (mop_i16i64_signed);
 DEF_SVE_TYPES_ARRAY (mop_i16i64_unsigned);
+DEF_SVE_TYPES_ARRAY (tmop_base);
+DEF_SVE_TYPES_ARRAY (tmop_h_float);
+DEF_SVE_TYPES_ARRAY (tmop_h_bfloat);
+DEF_SVE_TYPES_ARRAY (tmop_h_mf8);
+DEF_SVE_TYPES_ARRAY (tmop_s_mf8);
+
 DEF_SVE_TYPES_ARRAY (za);
 
 static const group_suffix_index groups_none[] = {
@@ -1863,7 +1893,7 @@ function_resolver::lookup_form (mode_suffix_index mode, 
sve_type type)
 }
 
 /* Resolve the function to one with the mode suffix given by MODE, the
-   type suffixes given by TYPE0 and TYPE1, and group suffix given by
+   type suffixes given by TYPE0, TYPE1 and TYPE2, and group suffix given by
    GROUP.  Return its function decl on success, otherwise report an
    error and return error_mark_node.  */
 tree
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 1dd942f377f..9508c017f60 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -348,6 +348,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED
 
 #define TARGET_STREAMING_SME2p1 (TARGET_STREAMING && AARCH64_HAVE_ISA (SME2p1))
 
+#define TARGET_STREAMING_SME_TMOP \
+  (AARCH64_HAVE_ISA (SME_TMOP) && TARGET_STREAMING)
+
 #define TARGET_SME_B16B16 AARCH64_HAVE_ISA (SME_B16B16)
 
 /* ARMv8.3-A features.  */
diff --git a/gcc/config/aarch64/constraints.md 
b/gcc/config/aarch64/constraints.md
index 3d166fe3a17..7e0f7670c24 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -64,6 +64,10 @@ (define_register_constraint "Uwt" "FP_REGS"
   "@internal The first register in a tuple of 4 strided FPRs."
   "(regno & 0xc) == 0")
 
+(define_register_constraint "Uwo" "FP_REGS"
+  "@internal Control Vector Register (One of Z20-Z23 or Z28-Z31)."
+  "(regno & 0x14) == 0x14")
+
 (define_register_constraint "Upa" "PR_REGS"
   "SVE predicate registers p0 - p15.")
 
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index b425b0ed2ca..cf327056449 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -527,6 +527,9 @@ (define_mode_iterator SVE_FULL_BHSI [VNx16QI VNx8HI VNx4SI])
 ;; Pairs of the above.
 (define_mode_iterator SVE_FULL_BHSIx2 [VNx32QI VNx16HI VNx8SI])
 
+;; Fully-packed SVE vector modes that have 16-bit elements.
+(define_mode_iterator SVE_FULL_H [VNx8HI VNx8BF VNx8HF])
+
 ;; Fully-packed SVE vector modes that have 16-bit float elements.
 (define_mode_iterator SVE_FULL_HF [VNx8BF VNx8HF])
 
@@ -553,6 +556,10 @@ (define_mode_iterator SVE_FULL_HSI [VNx8HI VNx4SI])
 ;; elements.
 (define_mode_iterator SVE_FULL_HSF [VNx8HF VNx4SF])
 
+;; Fully-packed SVE floating-point vector modes that have 16-bit or 32-bit
+;; elements, including brain float.
+(define_mode_iterator SVE_FULL_BHSF [VNx8BF VNx8HF VNx4SF])
+
 ;; Like SVE_FULL_HSF, but selectively enables those modes that are valid
 ;; for the variant of the SVE2 FP8 FDOT instruction associated with that
 ;; mode.
@@ -803,6 +810,8 @@ (define_mode_iterator SME_MOP_HSDF [VNx4SF
                                    (VNx8HF "TARGET_STREAMING_SME_F16F16")
                                    (VNx8BF "TARGET_STREAMING_SME_B16B16")])
 
+(define_mode_iterator SME_ZA_TMOP_FP8 [VNx8HI VNx4SI])
+
 ;; ------------------------------------------------------------------
 ;; Unspec enumerations for Advance SIMD. These could well go into
 ;; aarch64.md but for their use in int_iterators here.
@@ -1295,6 +1304,8 @@ (define_c_enum "unspec"
     UNSPEC_SME_FMOPA
     UNSPEC_SME_FMOPS
     UNSPEC_SME_FSUB
+    UNSPEC_SME_FTMOPA
+    UNSPEC_SME_FTMOPA_FP8
     UNSPEC_SME_LD1_HOR
     UNSPEC_SME_LD1_VER
     UNSPEC_SME_READ
@@ -1311,12 +1322,14 @@ (define_c_enum "unspec"
     UNSPEC_SME_SMOPS
     UNSPEC_SME_ST1_HOR
     UNSPEC_SME_ST1_VER
+    UNSPEC_SME_STMOPA
     UNSPEC_SME_SUB
     UNSPEC_SME_SUB_WRITE
     UNSPEC_SME_SUDOT
     UNSPEC_SME_SUVDOT
     UNSPEC_SME_SUMOPA
     UNSPEC_SME_SUMOPS
+    UNSPEC_SME_SUTMOPA
     UNSPEC_SME_UDOT
     UNSPEC_SME_UVDOT
     UNSPEC_SME_UMLA
@@ -1327,6 +1340,8 @@ (define_c_enum "unspec"
     UNSPEC_SME_USVDOT
     UNSPEC_SME_USMOPA
     UNSPEC_SME_USMOPS
+    UNSPEC_SME_USTMOPA
+    UNSPEC_SME_UTMOPA
     UNSPEC_SME_WRITE
     UNSPEC_SME_WRITE_HOR
     UNSPEC_SME_WRITE_VER
@@ -4092,6 +4107,11 @@ (define_int_iterator SME_FP8_FVDOT_HALF [
        UNSPEC_SME_FVDOTT_FP8
 ])
 
+(define_int_iterator SME_TMOP_INT [UNSPEC_SME_STMOPA UNSPEC_SME_UTMOPA
+                                  UNSPEC_SME_SUTMOPA UNSPEC_SME_USTMOPA])
+(define_int_iterator SME_TMOP_FP [UNSPEC_SME_FTMOPA])
+(define_int_iterator SME_TMOP_FP8 [UNSPEC_SME_FTMOPA_FP8])
+
 ;; Iterators for atomic operations.
 
 (define_int_iterator ATOMIC_LDOP
@@ -4251,6 +4271,8 @@ (define_int_attr optab [(UNSPEC_ANDF "and")
                        (UNSPEC_SME_FMOPA "fmopa")
                        (UNSPEC_SME_FMOPS "fmops")
                        (UNSPEC_SME_FSUB "fsub")
+                       (UNSPEC_SME_FTMOPA "ftmopa")
+                       (UNSPEC_SME_FTMOPA_FP8 "ftmopa")
                        (UNSPEC_SME_LD1_HOR "ld1_hor")
                        (UNSPEC_SME_LD1_VER "ld1_ver")
                        (UNSPEC_SME_READ_HOR "read_hor")
@@ -4265,6 +4287,8 @@ (define_int_attr optab [(UNSPEC_ANDF "and")
                        (UNSPEC_SME_SMOPS "smops")
                        (UNSPEC_SME_ST1_HOR "st1_hor")
                        (UNSPEC_SME_ST1_VER "st1_ver")
+                       (UNSPEC_SME_STMOPA "stmopa")
+                       (UNSPEC_SME_SUTMOPA "sutmopa")
                        (UNSPEC_SME_SUB "sub")
                        (UNSPEC_SME_SUB_WRITE "sub_write")
                        (UNSPEC_SME_SUDOT "sudot")
@@ -4278,9 +4302,11 @@ (define_int_attr optab [(UNSPEC_ANDF "and")
                        (UNSPEC_SME_UMOPA "umopa")
                        (UNSPEC_SME_UMOPS "umops")
                        (UNSPEC_SME_USDOT "usdot")
+                       (UNSPEC_SME_USTMOPA "ustmopa")
                        (UNSPEC_SME_USVDOT "usvdot")
                        (UNSPEC_SME_USMOPA "usmopa")
                        (UNSPEC_SME_USMOPS "usmops")
+                       (UNSPEC_SME_UTMOPA "utmopa")
                        (UNSPEC_SME_WRITE_HOR "write_hor")
                        (UNSPEC_SME_WRITE_VER "write_ver")
                        (UNSPEC_SQCADD90 "sqcadd90")
diff --git a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c 
b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c
index 284c2a23252..3556ff6c32f 100644
--- a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c
@@ -364,3 +364,19 @@
 #ifndef __ARM_FEATURE_FAMINMAX
 #error Foo
 #endif
+
+#pragma GCC target "arch=armv9.5-a+sme"
+#ifdef __ARM_FEATURE_SME_TMOP
+#error Foo
+#endif
+
+#pragma GCC target "arch=armv9-a+sme-tmop"
+#ifndef __ARM_FEATURE_SME_TMOP
+#error Foo
+#endif
+#ifndef __ARM_FEATURE_SME
+#error Foo
+#endif
+#ifndef __ARM_FEATURE_SME2
+#error Foo
+#endif
diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/test_sme2_acle.h 
b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/test_sme2_acle.h
index ff237983ad9..55b217ae0fe 100644
--- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/test_sme2_acle.h
+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/test_sme2_acle.h
@@ -121,4 +121,26 @@
     INVOKE (CODE1, CODE2);                                     \
   }
 
+#define TEST_ZA_TMOP(NAME, TYPE1, TYPE2, CODE1, CODE2)         \
+  PROTO (NAME, void, (fpm_t fpm0))                             \
+  {                                                            \
+    register TYPE1 z0 __asm ("z0");                            \
+    register TYPE1 z1 __asm ("z1");                            \
+    register TYPE1 z2 __asm ("z2");                            \
+    register TYPE2 z3 __asm ("z3");                            \
+    register TYPE1 z16 __asm ("z16");                          \
+    register TYPE2 z17 __asm ("z17");                          \
+    register svuint8_t z19 __asm ("z19");                      \
+    register svuint8_t z20 __asm ("z20");                      \
+    register svuint8_t z23 __asm ("z23");                      \
+    register svuint8_t z24 __asm ("z24");                      \
+    register svuint8_t z27 __asm ("z27");                      \
+    register svuint8_t z28 __asm ("z28");                      \
+    __asm volatile ("" : "=w" (z0), "=w" (z1), "=w" (z2),      \
+                   "=w" (z3), "=w" (z16), "=w" (z17),          \
+                   "=w" (z19), "=w" (z20), "=w" (z23),         \
+                   "=w" (z24), "=w" (z27), "=w" (z28));        \
+    INVOKE (CODE1, CODE2);                                     \
+  }
+
 #endif
diff --git 
a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za16_bf16_bf16.c 
b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za16_bf16_bf16.c
new file mode 100644
index 00000000000..84a9b64c41a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za16_bf16_bf16.c
@@ -0,0 +1,76 @@
+/* { dg-do assemble { target { aarch64_asm_sme-b16b16_ok && 
aarch64_asm_sme-tmop_ok}  } } */
+/* { dg-do compile { target { ! { aarch64_asm_sme-b16b16_ok && 
aarch64_asm_sme-tmop_ok } } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+#pragma GCC target "+sme-tmop+sme-b16b16"
+
+/*
+** tmopa_lane_za16_bf16_bf16_0_z0_z3_z20_0:
+**     bftmopa za0\.h, {z0\.h - z1\.h}, z3\.h, z20\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za16_bf16_bf16_0_z0_z3_z20_0, svbfloat16x2_t, 
svbfloat16_t,
+                svtmopa_lane_za16_bf16_bf16 (0, z0, z3, z20, 0),
+                svtmopa_lane_za16 (0, z0, z3, z20, 0))
+
+/* ZA slice and offset with different values.
+** tmopa_lane_za16_bf16_bf16_1_z2_z3_z20_3:
+**     bftmopa za1\.h, {z2\.h - z3\.h}, z3\.h, z20\[3\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za16_bf16_bf16_1_z2_z3_z20_3, svbfloat16x2_t, 
svbfloat16_t,
+                svtmopa_lane_za16_bf16_bf16 (1, z2, z3, z20, 3),
+                svtmopa_lane_za16 (1, z2, z3, z20, 3))
+
+/* The first register on the second argument must be even.
+** tmopa_lane_za16_bf16_bf16_0_z1_z3_z20_0:
+**     mov     (z\d+)\.d, z1\.d
+**     mov     (z\d+)\.d, z2\.d
+**     bftmopa za0\.h, {\1\.h - \2\.h}, z3\.h, z20\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za16_bf16_bf16_0_z1_z3_z20_0, svbfloat16x2_t, 
svbfloat16_t,
+                svtmopa_lane_za16_bf16_bf16 (0, z1, z3, z20, 0),
+                svtmopa_lane_za16 (0, z1, z3, z20, 0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za16_bf16_bf16_0_z0_z3_z19_0:
+**     mov     (z\d+).d, z19.d
+**     bftmopa za0\.h, {z0\.h - z1\.h}, z3\.h, \1\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za16_bf16_bf16_0_z0_z3_z19_0, svbfloat16x2_t, 
svbfloat16_t,
+                svtmopa_lane_za16_bf16_bf16 (0, z0, z3, z19, 0),
+                svtmopa_lane_za16 (0, z0, z3, z19, 0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za16_bf16_bf16_0_z0_z3_z24_0:
+**     mov     (z\d+).d, z24.d
+**     bftmopa za0\.h, {z0\.h - z1\.h}, z3\.h, \1\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za16_bf16_bf16_0_z0_z3_z24_0, svbfloat16x2_t, 
svbfloat16_t,
+                svtmopa_lane_za16_bf16_bf16 (0, z0, z3, z24, 0),
+                svtmopa_lane_za16 (0, z0, z3, z24, 0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za16_bf16_bf16_0_z0_z3_z27_0:
+**     mov     (z\d+).d, z27.d
+**     bftmopa za0\.h, {z0\.h - z1\.h}, z3\.h, \1\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za16_bf16_bf16_0_z0_z3_z27_0, svbfloat16x2_t, 
svbfloat16_t,
+                svtmopa_lane_za16_bf16_bf16 (0, z0, z3, z27, 0),
+                svtmopa_lane_za16 (0, z0, z3, z27, 0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za16_bf16_bf16_0_z0_z3_z28_0:
+**     bftmopa za0\.h, {z0\.h - z1\.h}, z3\.h, z28\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za16_bf16_bf16_0_z0_z3_z28_0, svbfloat16x2_t, 
svbfloat16_t,
+                svtmopa_lane_za16_bf16_bf16 (0, z0, z3, z28, 0),
+                svtmopa_lane_za16 (0, z0, z3, z28, 0))
+
diff --git 
a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za16_f16_f16.c 
b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za16_f16_f16.c
new file mode 100644
index 00000000000..7b0aeefe45e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za16_f16_f16.c
@@ -0,0 +1,76 @@
+/* { dg-do assemble { target { aarch64_asm_sme-f16f16_ok && 
aarch64_asm_sme-tmop_ok}  } } */
+/* { dg-do compile { target { ! { aarch64_asm_sme-f16f16_ok && 
aarch64_asm_sme-tmop_ok } } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+#pragma GCC target "+sme-tmop+sme-f16f16"
+
+/*
+** tmopa_lane_za16_f16_f16_0_z0_z3_z20_0:
+**     ftmopa  za0\.h, {z0\.h - z1\.h}, z3\.h, z20\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za16_f16_f16_0_z0_z3_z20_0, svfloat16x2_t, 
svfloat16_t,
+                svtmopa_lane_za16_f16_f16 (0, z0, z3, z20, 0),
+                svtmopa_lane_za16 (0, z0, z3, z20, 0))
+
+/* ZA slice and offset with different values.
+** tmopa_lane_za16_f16_f16_1_z2_z3_z20_3:
+**     ftmopa  za1\.h, {z2\.h - z3\.h}, z3\.h, z20\[3\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za16_f16_f16_1_z2_z3_z20_3, svfloat16x2_t, 
svfloat16_t,
+                svtmopa_lane_za16_f16_f16 (1, z2, z3, z20, 3),
+                svtmopa_lane_za16 (1, z2, z3, z20, 3))
+
+/* The first register on the second argument must be even.
+** tmopa_lane_za16_f16_f16_0_z1_z3_z20_0:
+**     mov     (z\d+)\.d, z1\.d
+**     mov     (z\d+)\.d, z2\.d
+**     ftmopa  za0\.h, {\1\.h - \2\.h}, z3\.h, z20\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za16_f16_f16_0_z1_z3_z20_0, svfloat16x2_t, 
svfloat16_t,
+                svtmopa_lane_za16_f16_f16 (0, z1, z3, z20, 0),
+                svtmopa_lane_za16 (0, z1, z3, z20, 0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za16_f16_f16_0_z0_z3_z19_0:
+**     mov     (z\d+).d, z19.d
+**     ftmopa  za0\.h, {z0\.h - z1\.h}, z3\.h, \1\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za16_f16_f16_0_z0_z3_z19_0, svfloat16x2_t, 
svfloat16_t,
+                svtmopa_lane_za16_f16_f16 (0, z0, z3, z19, 0),
+                svtmopa_lane_za16 (0, z0, z3, z19, 0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za16_f16_f16_0_z0_z3_z24_0:
+**     mov     (z\d+).d, z24.d
+**     ftmopa  za0\.h, {z0\.h - z1\.h}, z3\.h, \1\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za16_f16_f16_0_z0_z3_z24_0, svfloat16x2_t, 
svfloat16_t,
+                svtmopa_lane_za16_f16_f16 (0, z0, z3, z24, 0),
+                svtmopa_lane_za16 (0, z0, z3, z24, 0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za16_f16_f16_0_z0_z3_z27_0:
+**     mov     (z\d+).d, z27.d
+**     ftmopa  za0\.h, {z0\.h - z1\.h}, z3\.h, \1\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za16_f16_f16_0_z0_z3_z27_0, svfloat16x2_t, 
svfloat16_t,
+                svtmopa_lane_za16_f16_f16 (0, z0, z3, z27, 0),
+                svtmopa_lane_za16 (0, z0, z3, z27, 0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za16_f16_f16_0_z0_z3_z28_0:
+**     ftmopa  za0\.h, {z0\.h - z1\.h}, z3\.h, z28\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za16_f16_f16_0_z0_z3_z28_0, svfloat16x2_t, 
svfloat16_t,
+                svtmopa_lane_za16_f16_f16 (0, z0, z3, z28, 0),
+                svtmopa_lane_za16 (0, z0, z3, z28, 0))
+
diff --git 
a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za16_mf8_mf8.c 
b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za16_mf8_mf8.c
new file mode 100644
index 00000000000..b5381ce37b9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za16_mf8_mf8.c
@@ -0,0 +1,83 @@
+/* { dg-do assemble { target { aarch64_asm_sme-f16f16_ok && 
aarch64_asm_sme-tmop_ok}  } } */
+/* { dg-do compile { target { ! { aarch64_asm_sme-f16f16_ok && 
aarch64_asm_sme-tmop_ok } } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+#pragma GCC target "+sme-tmop+sme-f8f16"
+
+/*
+** tmopa_lane_za16_mf8_mf8_0_z0_z3_z20_0:
+**     msr     fpmr, x0
+**     ftmopa  za0\.h, {z0\.b - z1\.b}, z3\.b, z20\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za16_mf8_mf8_0_z0_z3_z20_0, svmfloat8x2_t, 
svmfloat8_t,
+                svtmopa_lane_za16_mf8_mf8_fpm (0, z0, z3, z20, 0, fpm0),
+                svtmopa_lane_za16_fpm (0, z0, z3, z20, 0, fpm0))
+
+/* ZA slice and offset with different values.
+** tmopa_lane_za16_mf8_mf8_1_z2_z3_z20_3:
+**     msr     fpmr, x0
+**     ftmopa  za1\.h, {z2\.b - z3\.b}, z3\.b, z20\[3\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za16_mf8_mf8_1_z2_z3_z20_3, svmfloat8x2_t, 
svmfloat8_t,
+                svtmopa_lane_za16_mf8_mf8_fpm (1, z2, z3, z20, 3, fpm0),
+                svtmopa_lane_za16_fpm (1, z2, z3, z20, 3, fpm0))
+
+/* The first register on the second argument must be even.
+** tmopa_lane_za16_mf8_mf8_0_z1_z3_z20_0:
+**     msr     fpmr, x0
+**     mov     (z\d+)\.d, z1\.d
+**     mov     (z\d+)\.d, z2\.d
+**     ftmopa  za0\.h, {\1\.b - \2\.b}, z3\.b, z20\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za16_mf8_mf8_0_z1_z3_z20_0, svmfloat8x2_t, 
svmfloat8_t,
+                svtmopa_lane_za16_mf8_mf8_fpm (0, z1, z3, z20, 0, fpm0),
+                svtmopa_lane_za16_fpm (0, z1, z3, z20, 0, fpm0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za16_mf8_mf8_0_z0_z3_z19_0:
+**     mov     (z\d+).d, z19.d
+**     msr     fpmr, x0
+**     ftmopa  za0\.h, {z0\.b - z1\.b}, z3\.b, \1\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za16_mf8_mf8_0_z0_z3_z19_0, svmfloat8x2_t, 
svmfloat8_t,
+                svtmopa_lane_za16_mf8_mf8_fpm (0, z0, z3, z19, 0, fpm0),
+                svtmopa_lane_za16_fpm (0, z0, z3, z19, 0, fpm0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za16_mf8_mf8_0_z0_z3_z24_0:
+**     mov     (z\d+).d, z24.d
+**     msr     fpmr, x0
+**     ftmopa  za0\.h, {z0\.b - z1\.b}, z3\.b, \1\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za16_mf8_mf8_0_z0_z3_z24_0, svmfloat8x2_t, 
svmfloat8_t,
+                svtmopa_lane_za16_mf8_mf8_fpm (0, z0, z3, z24, 0, fpm0),
+                svtmopa_lane_za16_fpm (0, z0, z3, z24, 0, fpm0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za16_mf8_mf8_0_z0_z3_z27_0:
+**     mov     (z\d+).d, z27.d
+**     msr     fpmr, x0
+**     ftmopa  za0\.h, {z0\.b - z1\.b}, z3\.b, \1\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za16_mf8_mf8_0_z0_z3_z27_0, svmfloat8x2_t, 
svmfloat8_t,
+                svtmopa_lane_za16_mf8_mf8_fpm (0, z0, z3, z27, 0, fpm0),
+                svtmopa_lane_za16_fpm (0, z0, z3, z27, 0, fpm0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za16_mf8_mf8_0_z0_z3_z28_0:
+**     msr     fpmr, x0
+**     ftmopa  za0\.h, {z0\.b - z1\.b}, z3\.b, z28\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za16_mf8_mf8_0_z0_z3_z28_0, svmfloat8x2_t, 
svmfloat8_t,
+                svtmopa_lane_za16_mf8_mf8_fpm (0, z0, z3, z28, 0, fpm0),
+                svtmopa_lane_za16_fpm (0, z0, z3, z28, 0, fpm0))
+
diff --git 
a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za32_bf16_bf16.c 
b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za32_bf16_bf16.c
new file mode 100644
index 00000000000..854961df988
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za32_bf16_bf16.c
@@ -0,0 +1,76 @@
+/* { dg-do assemble { target aarch64_asm_sme-tmop_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sme-tmop_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+#pragma GCC target "+sme-tmop"
+
+/*
+** tmopa_lane_za32_bf16_bf16_0_z0_z3_z20_0:
+**     bftmopa za0\.s, {z0\.h - z1\.h}, z3\.h, z20\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_bf16_bf16_0_z0_z3_z20_0, svbfloat16x2_t, 
svbfloat16_t,
+                svtmopa_lane_za32_bf16_bf16 (0, z0, z3, z20, 0),
+                svtmopa_lane_za32 (0, z0, z3, z20, 0))
+
+/* ZA slice and offset with maximum values.
+** tmopa_lane_za32_bf16_bf16_3_z2_z3_z20_3:
+**     bftmopa za3\.s, {z2\.h - z3\.h}, z3\.h, z20\[3\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_bf16_bf16_3_z2_z3_z20_3, svbfloat16x2_t, 
svbfloat16_t,
+                svtmopa_lane_za32_bf16_bf16 (3, z2, z3, z20, 3),
+                svtmopa_lane_za32 (3, z2, z3, z20, 3))
+
+/* The first register on the second argument must be even.
+** tmopa_lane_za32_bf16_bf16_0_z1_z3_z20_0:
+**     mov     (z\d+)\.d, z1\.d
+**     mov     (z\d+)\.d, z2\.d
+**     bftmopa za0\.s, {\1\.h - \2\.h}, z3\.h, z20\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_bf16_bf16_0_z1_z3_z20_0, svbfloat16x2_t, 
svbfloat16_t,
+                svtmopa_lane_za32_bf16_bf16 (0, z1, z3, z20, 0),
+                svtmopa_lane_za32 (0, z1, z3, z20, 0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za32_bf16_bf16_0_z0_z3_z19_0:
+**     mov     (z\d+).d, z19.d
+**     bftmopa za0\.s, {z0\.h - z1\.h}, z3\.h, \1\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_bf16_bf16_0_z0_z3_z19_0, svbfloat16x2_t, 
svbfloat16_t,
+                svtmopa_lane_za32_bf16_bf16 (0, z0, z3, z19, 0),
+                svtmopa_lane_za32 (0, z0, z3, z19, 0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za32_bf16_bf16_0_z0_z3_z24_0:
+**     mov     (z\d+).d, z24.d
+**     bftmopa za0\.s, {z0\.h - z1\.h}, z3\.h, \1\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_bf16_bf16_0_z0_z3_z24_0, svbfloat16x2_t, 
svbfloat16_t,
+                svtmopa_lane_za32_bf16_bf16 (0, z0, z3, z24, 0),
+                svtmopa_lane_za32 (0, z0, z3, z24, 0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za32_bf16_bf16_0_z0_z3_z27_0:
+**     mov     (z\d+).d, z27.d
+**     bftmopa za0\.s, {z0\.h - z1\.h}, z3\.h, \1\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_bf16_bf16_0_z0_z3_z27_0, svbfloat16x2_t, 
svbfloat16_t,
+                svtmopa_lane_za32_bf16_bf16 (0, z0, z3, z27, 0),
+                svtmopa_lane_za32 (0, z0, z3, z27, 0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za32_bf16_bf16_0_z0_z3_z28_0:
+**     bftmopa za0\.s, {z0\.h - z1\.h}, z3\.h, z28\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_bf16_bf16_0_z0_z3_z28_0, svbfloat16x2_t, 
svbfloat16_t,
+                svtmopa_lane_za32_bf16_bf16 (0, z0, z3, z28, 0),
+                svtmopa_lane_za32 (0, z0, z3, z28, 0))
+
diff --git 
a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za32_f16_f16.c 
b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za32_f16_f16.c
new file mode 100644
index 00000000000..dad5daac214
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za32_f16_f16.c
@@ -0,0 +1,76 @@
+/* { dg-do assemble { target aarch64_asm_sme-tmop_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sme-tmop_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+#pragma GCC target "+sme-tmop"
+
+/*
+** tmopa_lane_za32_f16_f16_0_z0_z3_z20_0:
+**     ftmopa  za0\.s, {z0\.h - z1\.h}, z3\.h, z20\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_f16_f16_0_z0_z3_z20_0, svfloat16x2_t, 
svfloat16_t,
+                svtmopa_lane_za32_f16_f16 (0, z0, z3, z20, 0),
+                svtmopa_lane_za32 (0, z0, z3, z20, 0))
+
+/* ZA slice and offset with maximum values.
+** tmopa_lane_za32_f16_f16_3_z2_z3_z20_3:
+**     ftmopa  za3\.s, {z2\.h - z3\.h}, z3\.h, z20\[3\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_f16_f16_3_z2_z3_z20_3, svfloat16x2_t, 
svfloat16_t,
+                svtmopa_lane_za32_f16_f16 (3, z2, z3, z20, 3),
+                svtmopa_lane_za32 (3, z2, z3, z20, 3))
+
+/* The first register on the second argument must be even.
+** tmopa_lane_za32_f16_f16_0_z1_z3_z20_0:
+**     mov     (z\d+)\.d, z1\.d
+**     mov     (z\d+)\.d, z2\.d
+**     ftmopa  za0\.s, {\1\.h - \2\.h}, z3\.h, z20\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_f16_f16_0_z1_z3_z20_0, svfloat16x2_t, 
svfloat16_t,
+                svtmopa_lane_za32_f16_f16 (0, z1, z3, z20, 0),
+                svtmopa_lane_za32 (0, z1, z3, z20, 0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za32_f16_f16_0_z0_z3_z19_0:
+**     mov     (z\d+).d, z19.d
+**     ftmopa  za0\.s, {z0\.h - z1\.h}, z3\.h, \1\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_f16_f16_0_z0_z3_z19_0, svfloat16x2_t, 
svfloat16_t,
+                svtmopa_lane_za32_f16_f16 (0, z0, z3, z19, 0),
+                svtmopa_lane_za32 (0, z0, z3, z19, 0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za32_f16_f16_0_z0_z3_z24_0:
+**     mov     (z\d+).d, z24.d
+**     ftmopa  za0\.s, {z0\.h - z1\.h}, z3\.h, \1\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_f16_f16_0_z0_z3_z24_0, svfloat16x2_t, 
svfloat16_t,
+                svtmopa_lane_za32_f16_f16 (0, z0, z3, z24, 0),
+                svtmopa_lane_za32 (0, z0, z3, z24, 0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za32_f16_f16_0_z0_z3_z27_0:
+**     mov     (z\d+).d, z27.d
+**     ftmopa  za0\.s, {z0\.h - z1\.h}, z3\.h, \1\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_f16_f16_0_z0_z3_z27_0, svfloat16x2_t, 
svfloat16_t,
+                svtmopa_lane_za32_f16_f16 (0, z0, z3, z27, 0),
+                svtmopa_lane_za32 (0, z0, z3, z27, 0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za32_f16_f16_0_z0_z3_z28_0:
+**     ftmopa  za0\.s, {z0\.h - z1\.h}, z3\.h, z28\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_f16_f16_0_z0_z3_z28_0, svfloat16x2_t, 
svfloat16_t,
+                svtmopa_lane_za32_f16_f16 (0, z0, z3, z28, 0),
+                svtmopa_lane_za32 (0, z0, z3, z28, 0))
+
diff --git 
a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za32_f32_f32.c 
b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za32_f32_f32.c
new file mode 100644
index 00000000000..c61d2f08ed5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za32_f32_f32.c
@@ -0,0 +1,76 @@
+/* { dg-do assemble { target aarch64_asm_sme-tmop_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sme-tmop_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+#pragma GCC target "+sme-tmop"
+
+/*
+** tmopa_lane_za32_f32_f32_0_z0_z3_z20_0:
+**     ftmopa  za0\.s, {z0\.s - z1\.s}, z3\.s, z20\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_f32_f32_0_z0_z3_z20_0, svfloat32x2_t, 
svfloat32_t,
+                svtmopa_lane_za32_f32_f32 (0, z0, z3, z20, 0),
+                svtmopa_lane_za32 (0, z0, z3, z20, 0))
+
+/* ZA slice and offset with maximum values.
+** tmopa_lane_za32_f32_f32_3_z2_z3_z20_3:
+**     ftmopa  za3\.s, {z2\.s - z3\.s}, z3\.s, z20\[3\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_f32_f32_3_z2_z3_z20_3, svfloat32x2_t, 
svfloat32_t,
+                svtmopa_lane_za32_f32_f32 (3, z2, z3, z20, 3),
+                svtmopa_lane_za32 (3, z2, z3, z20, 3))
+
+/* The first register on the second argument must be even.
+** tmopa_lane_za32_f32_f32_0_z1_z3_z20_0:
+**     mov     (z\d+)\.d, z1\.d
+**     mov     (z\d+)\.d, z2\.d
+**     ftmopa  za0\.s, {\1\.s - \2\.s}, z3\.s, z20\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_f32_f32_0_z1_z3_z20_0, svfloat32x2_t, 
svfloat32_t,
+                svtmopa_lane_za32_f32_f32 (0, z1, z3, z20, 0),
+                svtmopa_lane_za32 (0, z1, z3, z20, 0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za32_f32_f32_0_z0_z3_z19_0:
+**     mov     (z\d+).d, z19.d
+**     ftmopa  za0\.s, {z0\.s - z1\.s}, z3\.s, \1\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_f32_f32_0_z0_z3_z19_0, svfloat32x2_t, 
svfloat32_t,
+                svtmopa_lane_za32_f32_f32 (0, z0, z3, z19, 0),
+                svtmopa_lane_za32 (0, z0, z3, z19, 0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za32_f32_f32_0_z0_z3_z24_0:
+**     mov     (z\d+).d, z24.d
+**     ftmopa  za0\.s, {z0\.s - z1\.s}, z3\.s, \1\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_f32_f32_0_z0_z3_z24_0, svfloat32x2_t, 
svfloat32_t,
+                svtmopa_lane_za32_f32_f32 (0, z0, z3, z24, 0),
+                svtmopa_lane_za32 (0, z0, z3, z24, 0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za32_f32_f32_0_z0_z3_z27_0:
+**     mov     (z\d+).d, z27.d
+**     ftmopa  za0\.s, {z0\.s - z1\.s}, z3\.s, \1\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_f32_f32_0_z0_z3_z27_0, svfloat32x2_t, 
svfloat32_t,
+                svtmopa_lane_za32_f32_f32 (0, z0, z3, z27, 0),
+                svtmopa_lane_za32 (0, z0, z3, z27, 0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za32_f32_f32_0_z0_z3_z28_0:
+**     ftmopa  za0\.s, {z0\.s - z1\.s}, z3\.s, z28\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_f32_f32_0_z0_z3_z28_0, svfloat32x2_t, 
svfloat32_t,
+                svtmopa_lane_za32_f32_f32 (0, z0, z3, z28, 0),
+                svtmopa_lane_za32 (0, z0, z3, z28, 0))
+
diff --git 
a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za32_mf8_mf8.c 
b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za32_mf8_mf8.c
new file mode 100644
index 00000000000..5eca7c6c477
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za32_mf8_mf8.c
@@ -0,0 +1,83 @@
+/* { dg-do assemble { target aarch64_asm_sme-tmop_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sme-tmop_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+#pragma GCC target "+sme-tmop+sme-f8f32"
+
+/*
+** tmopa_lane_za32_mf8_mf8_0_z0_z3_z20_0:
+**     msr     fpmr, x0
+**     ftmopa  za0\.s, {z0\.b - z1\.b}, z3\.b, z20\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_mf8_mf8_0_z0_z3_z20_0, svmfloat8x2_t, 
svmfloat8_t,
+                svtmopa_lane_za32_mf8_mf8_fpm (0, z0, z3, z20, 0, fpm0),
+                svtmopa_lane_za32_fpm (0, z0, z3, z20, 0, fpm0))
+
+/* ZA slice and offset with maximum values.
+** tmopa_lane_za32_mf8_mf8_3_z2_z3_z20_3:
+**     msr     fpmr, x0
+**     ftmopa  za3\.s, {z2\.b - z3\.b}, z3\.b, z20\[3\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_mf8_mf8_3_z2_z3_z20_3, svmfloat8x2_t, 
svmfloat8_t,
+                svtmopa_lane_za32_mf8_mf8_fpm (3, z2, z3, z20, 3, fpm0),
+                svtmopa_lane_za32_fpm (3, z2, z3, z20, 3, fpm0))
+
+/* The first register on the second argument must be even.
+** tmopa_lane_za32_mf8_mf8_0_z1_z3_z20_0:
+**     msr     fpmr, x0
+**     mov     (z\d+)\.d, z1\.d
+**     mov     (z\d+)\.d, z2\.d
+**     ftmopa  za0\.s, {\1\.b - \2\.b}, z3\.b, z20\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_mf8_mf8_0_z1_z3_z20_0, svmfloat8x2_t, 
svmfloat8_t,
+                svtmopa_lane_za32_mf8_mf8_fpm (0, z1, z3, z20, 0, fpm0),
+                svtmopa_lane_za32_fpm (0, z1, z3, z20, 0, fpm0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za32_mf8_mf8_0_z0_z3_z19_0:
+**     mov     (z\d+).d, z19.d
+**     msr     fpmr, x0
+**     ftmopa  za0\.s, {z0\.b - z1\.b}, z3\.b, \1\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_mf8_mf8_0_z0_z3_z19_0, svmfloat8x2_t, 
svmfloat8_t,
+                svtmopa_lane_za32_mf8_mf8_fpm (0, z0, z3, z19, 0, fpm0),
+                svtmopa_lane_za32_fpm (0, z0, z3, z19, 0, fpm0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za32_mf8_mf8_0_z0_z3_z24_0:
+**     mov     (z\d+).d, z24.d
+**     msr     fpmr, x0
+**     ftmopa  za0\.s, {z0\.b - z1\.b}, z3\.b, \1\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_mf8_mf8_0_z0_z3_z24_0, svmfloat8x2_t, 
svmfloat8_t,
+                svtmopa_lane_za32_mf8_mf8_fpm (0, z0, z3, z24, 0, fpm0),
+                svtmopa_lane_za32_fpm (0, z0, z3, z24, 0, fpm0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za32_mf8_mf8_0_z0_z3_z27_0:
+**     mov     (z\d+).d, z27.d
+**     msr     fpmr, x0
+**     ftmopa  za0\.s, {z0\.b - z1\.b}, z3\.b, \1\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_mf8_mf8_0_z0_z3_z27_0, svmfloat8x2_t, 
svmfloat8_t,
+                svtmopa_lane_za32_mf8_mf8_fpm (0, z0, z3, z27, 0, fpm0),
+                svtmopa_lane_za32_fpm (0, z0, z3, z27, 0, fpm0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za32_mf8_mf8_0_z0_z3_z28_0:
+**     msr     fpmr, x0
+**     ftmopa  za0\.s, {z0\.b - z1\.b}, z3\.b, z28\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_mf8_mf8_0_z0_z3_z28_0, svmfloat8x2_t, 
svmfloat8_t,
+                svtmopa_lane_za32_mf8_mf8_fpm (0, z0, z3, z28, 0, fpm0),
+                svtmopa_lane_za32_fpm (0, z0, z3, z28, 0, fpm0))
+
diff --git 
a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za32_s16_s16.c 
b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za32_s16_s16.c
new file mode 100644
index 00000000000..c8533a76d46
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za32_s16_s16.c
@@ -0,0 +1,76 @@
+/* { dg-do assemble { target aarch64_asm_sme-tmop_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sme-tmop_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+#pragma GCC target "+sme-tmop"
+
+/*
+** tmopa_lane_za32_s16_s16_0_z0_z3_z20_0:
+**     stmopa  za0\.s, {z0\.h - z1\.h}, z3\.h, z20\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_s16_s16_0_z0_z3_z20_0, svint16x2_t, svint16_t,
+                svtmopa_lane_za32_s16_s16 (0, z0, z3, z20, 0),
+                svtmopa_lane_za32 (0, z0, z3, z20, 0))
+
+/* ZA slice and offset with maximum values.
+** tmopa_lane_za32_s16_s16_3_z2_z3_z20_3:
+**     stmopa  za3\.s, {z2\.h - z3\.h}, z3\.h, z20\[3\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_s16_s16_3_z2_z3_z20_3, svint16x2_t, svint16_t,
+                svtmopa_lane_za32_s16_s16 (3, z2, z3, z20, 3),
+                svtmopa_lane_za32 (3, z2, z3, z20, 3))
+
+/* The first register on the second argument must be even.
+** tmopa_lane_za32_s16_s16_0_z1_z3_z20_0:
+**     mov     (z\d+)\.d, z1\.d
+**     mov     (z\d+)\.d, z2\.d
+**     stmopa  za0\.s, {\1\.h - \2\.h}, z3\.h, z20\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_s16_s16_0_z1_z3_z20_0, svint16x2_t, svint16_t,
+                svtmopa_lane_za32_s16_s16 (0, z1, z3, z20, 0),
+                svtmopa_lane_za32 (0, z1, z3, z20, 0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za32_s16_s16_0_z0_z3_z19_0:
+**     mov     (z\d+).d, z19.d
+**     stmopa  za0\.s, {z0\.h - z1\.h}, z3\.h, \1\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_s16_s16_0_z0_z3_z19_0, svint16x2_t, svint16_t,
+                svtmopa_lane_za32_s16_s16 (0, z0, z3, z19, 0),
+                svtmopa_lane_za32 (0, z0, z3, z19, 0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za32_s16_s16_0_z0_z3_z24_0:
+**     mov     (z\d+).d, z24.d
+**     stmopa  za0\.s, {z0\.h - z1\.h}, z3\.h, \1\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_s16_s16_0_z0_z3_z24_0, svint16x2_t, svint16_t,
+                svtmopa_lane_za32_s16_s16 (0, z0, z3, z24, 0),
+                svtmopa_lane_za32 (0, z0, z3, z24, 0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za32_s16_s16_0_z0_z3_z27_0:
+**     mov     (z\d+).d, z27.d
+**     stmopa  za0\.s, {z0\.h - z1\.h}, z3\.h, \1\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_s16_s16_0_z0_z3_z27_0, svint16x2_t, svint16_t,
+                svtmopa_lane_za32_s16_s16 (0, z0, z3, z27, 0),
+                svtmopa_lane_za32 (0, z0, z3, z27, 0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za32_s16_s16_0_z0_z3_z28_0:
+**     stmopa  za0\.s, {z0\.h - z1\.h}, z3\.h, z28\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_s16_s16_0_z0_z3_z28_0, svint16x2_t, svint16_t,
+                svtmopa_lane_za32_s16_s16 (0, z0, z3, z28, 0),
+                svtmopa_lane_za32 (0, z0, z3, z28, 0))
+
diff --git 
a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za32_s8_s8.c 
b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za32_s8_s8.c
new file mode 100644
index 00000000000..65a4e976797
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za32_s8_s8.c
@@ -0,0 +1,76 @@
+/* { dg-do assemble { target aarch64_asm_sme-tmop_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sme-tmop_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+#pragma GCC target "+sme-tmop"
+
+/*
+** tmopa_lane_za32_s8_s8_0_z0_z3_z20_0:
+**     stmopa  za0\.s, {z0\.b - z1\.b}, z3\.b, z20\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_s8_s8_0_z0_z3_z20_0, svint8x2_t, svint8_t,
+                svtmopa_lane_za32_s8_s8 (0, z0, z3, z20, 0),
+                svtmopa_lane_za32 (0, z0, z3, z20, 0))
+
+/* ZA slice and offset with maximum values.
+** tmopa_lane_za32_s8_s8_3_z2_z3_z20_3:
+**     stmopa  za3\.s, {z2\.b - z3\.b}, z3\.b, z20\[3\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_s8_s8_3_z2_z3_z20_3, svint8x2_t, svint8_t,
+                svtmopa_lane_za32_s8_s8 (3, z2, z3, z20, 3),
+                svtmopa_lane_za32 (3, z2, z3, z20, 3))
+
+/* The first register on the second argument must be even.
+** tmopa_lane_za32_s8_s8_0_z1_z3_z20_0:
+**     mov     (z\d+)\.d, z1\.d
+**     mov     (z\d+)\.d, z2\.d
+**     stmopa  za0\.s, {\1\.b - \2\.b}, z3\.b, z20\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_s8_s8_0_z1_z3_z20_0, svint8x2_t, svint8_t,
+                svtmopa_lane_za32_s8_s8 (0, z1, z3, z20, 0),
+                svtmopa_lane_za32 (0, z1, z3, z20, 0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za32_s8_s8_0_z0_z3_z19_0:
+**     mov     (z\d+).d, z19.d
+**     stmopa  za0\.s, {z0\.b - z1\.b}, z3\.b, \1\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_s8_s8_0_z0_z3_z19_0, svint8x2_t, svint8_t,
+                svtmopa_lane_za32_s8_s8 (0, z0, z3, z19, 0),
+                svtmopa_lane_za32 (0, z0, z3, z19, 0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za32_s8_s8_0_z0_z3_z24_0:
+**     mov     (z\d+).d, z24.d
+**     stmopa  za0\.s, {z0\.b - z1\.b}, z3\.b, \1\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_s8_s8_0_z0_z3_z24_0, svint8x2_t, svint8_t,
+                svtmopa_lane_za32_s8_s8 (0, z0, z3, z24, 0),
+                svtmopa_lane_za32 (0, z0, z3, z24, 0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za32_s8_s8_0_z0_z3_z27_0:
+**     mov     (z\d+).d, z27.d
+**     stmopa  za0\.s, {z0\.b - z1\.b}, z3\.b, \1\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_s8_s8_0_z0_z3_z27_0, svint8x2_t, svint8_t,
+                svtmopa_lane_za32_s8_s8 (0, z0, z3, z27, 0),
+                svtmopa_lane_za32 (0, z0, z3, z27, 0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za32_s8_s8_0_z0_z3_z28_0:
+**     stmopa  za0\.s, {z0\.b - z1\.b}, z3\.b, z28\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_s8_s8_0_z0_z3_z28_0, svint8x2_t, svint8_t,
+                svtmopa_lane_za32_s8_s8 (0, z0, z3, z28, 0),
+                svtmopa_lane_za32 (0, z0, z3, z28, 0))
+
diff --git 
a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za32_s8_u8.c 
b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za32_s8_u8.c
new file mode 100644
index 00000000000..8bf14909516
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za32_s8_u8.c
@@ -0,0 +1,76 @@
+/* { dg-do assemble { target aarch64_asm_sme-tmop_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sme-tmop_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+#pragma GCC target "+sme-tmop"
+
+/*
+** tmopa_lane_za32_s8_u8_0_z0_z3_z20_0:
+**     sutmopa za0\.s, {z0\.b - z1\.b}, z3\.b, z20\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_s8_u8_0_z0_z3_z20_0, svint8x2_t, svuint8_t,
+                svtmopa_lane_za32_s8_u8 (0, z0, z3, z20, 0),
+                svtmopa_lane_za32 (0, z0, z3, z20, 0))
+
+/* ZA slice and offset with maximum values.
+** tmopa_lane_za32_s8_u8_3_z2_z3_z20_3:
+**     sutmopa za3\.s, {z2\.b - z3\.b}, z3\.b, z20\[3\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_s8_u8_3_z2_z3_z20_3, svint8x2_t, svuint8_t,
+                svtmopa_lane_za32_s8_u8 (3, z2, z3, z20, 3),
+                svtmopa_lane_za32 (3, z2, z3, z20, 3))
+
+/* The first register on the second argument must be even.
+** tmopa_lane_za32_s8_u8_0_z1_z3_z20_0:
+**     mov     (z\d+)\.d, z1\.d
+**     mov     (z\d+)\.d, z2\.d
+**     sutmopa za0\.s, {\1\.b - \2\.b}, z3\.b, z20\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_s8_u8_0_z1_z3_z20_0, svint8x2_t, svuint8_t,
+                svtmopa_lane_za32_s8_u8 (0, z1, z3, z20, 0),
+                svtmopa_lane_za32 (0, z1, z3, z20, 0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za32_s8_u8_0_z0_z3_z19_0:
+**     mov     (z\d+).d, z19.d
+**     sutmopa za0\.s, {z0\.b - z1\.b}, z3\.b, \1\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_s8_u8_0_z0_z3_z19_0, svint8x2_t, svuint8_t,
+                svtmopa_lane_za32_s8_u8 (0, z0, z3, z19, 0),
+                svtmopa_lane_za32 (0, z0, z3, z19, 0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za32_s8_u8_0_z0_z3_z24_0:
+**     mov     (z\d+).d, z24.d
+**     sutmopa za0\.s, {z0\.b - z1\.b}, z3\.b, \1\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_s8_u8_0_z0_z3_z24_0, svint8x2_t, svuint8_t,
+                svtmopa_lane_za32_s8_u8 (0, z0, z3, z24, 0),
+                svtmopa_lane_za32 (0, z0, z3, z24, 0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za32_s8_u8_0_z0_z3_z27_0:
+**     mov     (z\d+).d, z27.d
+**     sutmopa za0\.s, {z0\.b - z1\.b}, z3\.b, \1\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_s8_u8_0_z0_z3_z27_0, svint8x2_t, svuint8_t,
+                svtmopa_lane_za32_s8_u8 (0, z0, z3, z27, 0),
+                svtmopa_lane_za32 (0, z0, z3, z27, 0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za32_s8_u8_0_z0_z3_z28_0:
+**     sutmopa za0\.s, {z0\.b - z1\.b}, z3\.b, z28\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_s8_u8_0_z0_z3_z28_0, svint8x2_t, svuint8_t,
+                svtmopa_lane_za32_s8_u8 (0, z0, z3, z28, 0),
+                svtmopa_lane_za32 (0, z0, z3, z28, 0))
+
diff --git 
a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za32_u16_u16.c 
b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za32_u16_u16.c
new file mode 100644
index 00000000000..a871cbc1aef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za32_u16_u16.c
@@ -0,0 +1,76 @@
+/* { dg-do assemble { target aarch64_asm_sme-tmop_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sme-tmop_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+#pragma GCC target "+sme-tmop"
+
+/*
+** tmopa_lane_za32_u16_u16_0_z0_z3_z20_0:
+**     utmopa  za0\.s, {z0\.h - z1\.h}, z3\.h, z20\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_u16_u16_0_z0_z3_z20_0, svuint16x2_t, svuint16_t,
+                svtmopa_lane_za32_u16_u16 (0, z0, z3, z20, 0),
+                svtmopa_lane_za32 (0, z0, z3, z20, 0))
+
+/* ZA slice and offset with maximum values.
+** tmopa_lane_za32_u16_u16_3_z2_z3_z20_3:
+**     utmopa  za3\.s, {z2\.h - z3\.h}, z3\.h, z20\[3\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_u16_u16_3_z2_z3_z20_3, svuint16x2_t, svuint16_t,
+                svtmopa_lane_za32_u16_u16 (3, z2, z3, z20, 3),
+                svtmopa_lane_za32 (3, z2, z3, z20, 3))
+
+/* The first register on the second argument must be even.
+** tmopa_lane_za32_u16_u16_0_z1_z3_z20_0:
+**     mov     (z\d+)\.d, z1\.d
+**     mov     (z\d+)\.d, z2\.d
+**     utmopa  za0\.s, {\1\.h - \2\.h}, z3\.h, z20\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_u16_u16_0_z1_z3_z20_0, svuint16x2_t, svuint16_t,
+                svtmopa_lane_za32_u16_u16 (0, z1, z3, z20, 0),
+                svtmopa_lane_za32 (0, z1, z3, z20, 0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za32_u16_u16_0_z0_z3_z19_0:
+**     mov     (z\d+).d, z19.d
+**     utmopa  za0\.s, {z0\.h - z1\.h}, z3\.h, \1\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_u16_u16_0_z0_z3_z19_0, svuint16x2_t, svuint16_t,
+                svtmopa_lane_za32_u16_u16 (0, z0, z3, z19, 0),
+                svtmopa_lane_za32 (0, z0, z3, z19, 0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za32_u16_u16_0_z0_z3_z24_0:
+**     mov     (z\d+).d, z24.d
+**     utmopa  za0\.s, {z0\.h - z1\.h}, z3\.h, \1\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_u16_u16_0_z0_z3_z24_0, svuint16x2_t, svuint16_t,
+                svtmopa_lane_za32_u16_u16 (0, z0, z3, z24, 0),
+                svtmopa_lane_za32 (0, z0, z3, z24, 0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za32_u16_u16_0_z0_z3_z27_0:
+**     mov     (z\d+).d, z27.d
+**     utmopa  za0\.s, {z0\.h - z1\.h}, z3\.h, \1\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_u16_u16_0_z0_z3_z27_0, svuint16x2_t, svuint16_t,
+                svtmopa_lane_za32_u16_u16 (0, z0, z3, z27, 0),
+                svtmopa_lane_za32 (0, z0, z3, z27, 0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za32_u16_u16_0_z0_z3_z28_0:
+**     utmopa  za0\.s, {z0\.h - z1\.h}, z3\.h, z28\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_u16_u16_0_z0_z3_z28_0, svuint16x2_t, svuint16_t,
+                svtmopa_lane_za32_u16_u16 (0, z0, z3, z28, 0),
+                svtmopa_lane_za32 (0, z0, z3, z28, 0))
+
diff --git 
a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za32_u8_s8.c 
b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za32_u8_s8.c
new file mode 100644
index 00000000000..3d06044989a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za32_u8_s8.c
@@ -0,0 +1,76 @@
+/* { dg-do assemble { target aarch64_asm_sme-tmop_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sme-tmop_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+#pragma GCC target "+sme-tmop"
+
+/*
+** tmopa_lane_za32_u8_s8_0_z0_z3_z20_0:
+**     ustmopa za0\.s, {z0\.b - z1\.b}, z3\.b, z20\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_u8_s8_0_z0_z3_z20_0, svuint8x2_t, svint8_t,
+                svtmopa_lane_za32_u8_s8 (0, z0, z3, z20, 0),
+                svtmopa_lane_za32 (0, z0, z3, z20, 0))
+
+/* ZA slice and offset with maximum values.
+** tmopa_lane_za32_u8_s8_3_z2_z3_z20_3:
+**     ustmopa za3\.s, {z2\.b - z3\.b}, z3\.b, z20\[3\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_u8_s8_3_z2_z3_z20_3, svuint8x2_t, svint8_t,
+                svtmopa_lane_za32_u8_s8 (3, z2, z3, z20, 3),
+                svtmopa_lane_za32 (3, z2, z3, z20, 3))
+
+/* The first register on the second argument must be even.
+** tmopa_lane_za32_u8_s8_0_z1_z3_z20_0:
+**     mov     (z\d+)\.d, z1\.d
+**     mov     (z\d+)\.d, z2\.d
+**     ustmopa za0\.s, {\1\.b - \2\.b}, z3\.b, z20\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_u8_s8_0_z1_z3_z20_0, svuint8x2_t, svint8_t,
+                svtmopa_lane_za32_u8_s8 (0, z1, z3, z20, 0),
+                svtmopa_lane_za32 (0, z1, z3, z20, 0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za32_u8_s8_0_z0_z3_z19_0:
+**     mov     (z\d+).d, z19.d
+**     ustmopa za0\.s, {z0\.b - z1\.b}, z3\.b, \1\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_u8_s8_0_z0_z3_z19_0, svuint8x2_t, svint8_t,
+                svtmopa_lane_za32_u8_s8 (0, z0, z3, z19, 0),
+                svtmopa_lane_za32 (0, z0, z3, z19, 0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za32_u8_s8_0_z0_z3_z24_0:
+**     mov     (z\d+).d, z24.d
+**     ustmopa za0\.s, {z0\.b - z1\.b}, z3\.b, \1\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_u8_s8_0_z0_z3_z24_0, svuint8x2_t, svint8_t,
+                svtmopa_lane_za32_u8_s8 (0, z0, z3, z24, 0),
+                svtmopa_lane_za32 (0, z0, z3, z24, 0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za32_u8_s8_0_z0_z3_z27_0:
+**     mov     (z\d+).d, z27.d
+**     ustmopa za0\.s, {z0\.b - z1\.b}, z3\.b, \1\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_u8_s8_0_z0_z3_z27_0, svuint8x2_t, svint8_t,
+                svtmopa_lane_za32_u8_s8 (0, z0, z3, z27, 0),
+                svtmopa_lane_za32 (0, z0, z3, z27, 0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za32_u8_s8_0_z0_z3_z28_0:
+**     ustmopa za0\.s, {z0\.b - z1\.b}, z3\.b, z28\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_u8_s8_0_z0_z3_z28_0, svuint8x2_t, svint8_t,
+                svtmopa_lane_za32_u8_s8 (0, z0, z3, z28, 0),
+                svtmopa_lane_za32 (0, z0, z3, z28, 0))
+
diff --git 
a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za32_u8_u8.c 
b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za32_u8_u8.c
new file mode 100644
index 00000000000..bd2519e0ea0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/tmopa_lane_za32_u8_u8.c
@@ -0,0 +1,76 @@
+/* { dg-do assemble { target aarch64_asm_sme-tmop_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sme-tmop_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+#pragma GCC target "+sme-tmop"
+
+/*
+** tmopa_lane_za32_u8_u8_0_z0_z3_z20_0:
+**     utmopa  za0\.s, {z0\.b - z1\.b}, z3\.b, z20\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_u8_u8_0_z0_z3_z20_0, svuint8x2_t, svuint8_t,
+                svtmopa_lane_za32_u8_u8 (0, z0, z3, z20, 0),
+                svtmopa_lane_za32 (0, z0, z3, z20, 0))
+
+/* ZA slice and offset with maximum values.
+** tmopa_lane_za32_u8_u8_3_z2_z3_z20_3:
+**     utmopa  za3\.s, {z2\.b - z3\.b}, z3\.b, z20\[3\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_u8_u8_3_z2_z3_z20_3, svuint8x2_t, svuint8_t,
+                svtmopa_lane_za32_u8_u8 (3, z2, z3, z20, 3),
+                svtmopa_lane_za32 (3, z2, z3, z20, 3))
+
+/* The first register on the second argument must be even.
+** tmopa_lane_za32_u8_u8_0_z1_z3_z20_0:
+**     mov     (z\d+)\.d, z1\.d
+**     mov     (z\d+)\.d, z2\.d
+**     utmopa  za0\.s, {\1\.b - \2\.b}, z3\.b, z20\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_u8_u8_0_z1_z3_z20_0, svuint8x2_t, svuint8_t,
+                svtmopa_lane_za32_u8_u8 (0, z1, z3, z20, 0),
+                svtmopa_lane_za32 (0, z1, z3, z20, 0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za32_u8_u8_0_z0_z3_z19_0:
+**     mov     (z\d+).d, z19.d
+**     utmopa  za0\.s, {z0\.b - z1\.b}, z3\.b, \1\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_u8_u8_0_z0_z3_z19_0, svuint8x2_t, svuint8_t,
+                svtmopa_lane_za32_u8_u8 (0, z0, z3, z19, 0),
+                svtmopa_lane_za32 (0, z0, z3, z19, 0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za32_u8_u8_0_z0_z3_z24_0:
+**     mov     (z\d+).d, z24.d
+**     utmopa  za0\.s, {z0\.b - z1\.b}, z3\.b, \1\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_u8_u8_0_z0_z3_z24_0, svuint8x2_t, svuint8_t,
+                svtmopa_lane_za32_u8_u8 (0, z0, z3, z24, 0),
+                svtmopa_lane_za32 (0, z0, z3, z24, 0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za32_u8_u8_0_z0_z3_z27_0:
+**     mov     (z\d+).d, z27.d
+**     utmopa  za0\.s, {z0\.b - z1\.b}, z3\.b, \1\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_u8_u8_0_z0_z3_z27_0, svuint8x2_t, svuint8_t,
+                svtmopa_lane_za32_u8_u8 (0, z0, z3, z27, 0),
+                svtmopa_lane_za32 (0, z0, z3, z27, 0))
+
+/* zk register must be one of Z20-Z23 or Z28-Z31.
+** tmopa_lane_za32_u8_u8_0_z0_z3_z28_0:
+**     utmopa  za0\.s, {z0\.b - z1\.b}, z3\.b, z28\[0\]
+**     ret
+*/
+TEST_ZA_TMOP (tmopa_lane_za32_u8_u8_0_z0_z3_z28_0, svuint8x2_t, svuint8_t,
+                svtmopa_lane_za32_u8_u8 (0, z0, z3, z28, 0),
+                svtmopa_lane_za32 (0, z0, z3, z28, 0))
+
diff --git 
a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_za_uint_dual_single_1.c
 
b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_za_uint_dual_single_1.c
new file mode 100644
index 00000000000..f1b170bb70a
--- /dev/null
+++ 
b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_za_uint_dual_single_1.c
@@ -0,0 +1,87 @@
+/* { dg-do compile } */
+
+#include <arm_sme.h>
+
+#pragma GCC target ("arch=armv9-a+sme-tmop")
+
+void
+f1 (uint64_t u64,
+    svfloat32x2_t f32x2, svfloat32_t f32,
+    svfloat16x2_t f16x2, svfloat16_t f16,
+    svint8x2_t s8x2, svint8_t s8,
+    svuint8x2_t u8x2, svuint8_t u8,
+    svint16_t s16, svuint16_t u16)
+  __arm_streaming __arm_inout("za")
+{
+  svtmopa_lane_za32_f32_f32 (0, f32x2, f32, u8, 0);
+
+  svtmopa_lane_za32_f32_f32 (0, f32x2, f32, u8); /* { dg-error {too few 
arguments to function 'svtmopa_lane_za32_f32_f32'} } */
+  svtmopa_lane_za32_f32_f32 (0, f32x2, f32, u8, 0, 0); /* { dg-error {too many 
arguments to function 'svtmopa_lane_za32_f32_f32'} } */
+  svtmopa_lane_za32_f32_f32 (u64, f32x2, f32, u8, 0); /* { dg-error {argument 
1 of 'svtmopa_lane_za32_f32_f32' must be an integer constant expression} } */
+  svtmopa_lane_za32_f32_f32 (0, f32x2, f32, u8, u64); /* { dg-error {argument 
5 of 'svtmopa_lane_za32_f32_f32' must be an integer constant expression} } */
+
+  svtmopa_lane_za32_f32_f32 (-1, f32x2, f32, u8, 0); /* { dg-error {passing -1 
to argument 1 of 'svtmopa_lane_za32_f32_f32', which expects a value in the 
range \[0, 3\]} } */
+  svtmopa_lane_za32_f32_f32 (4, f32x2, f32, u8, 0); /* { dg-error {passing 4 
to argument 1 of 'svtmopa_lane_za32_f32_f32', which expects a value in the 
range \[0, 3\]} } */
+
+  svtmopa_lane_za32_f32_f32 (0, f32x2, f32, u8, -1); /* { dg-error {passing -1 
to argument 5 of 'svtmopa_lane_za32_f32_f32', which expects a value in the 
range \[0, 3\]} } */
+  svtmopa_lane_za32_f32_f32 (0, f32x2, f32, u8, 4); /* { dg-error {passing 4 
to argument 5 of 'svtmopa_lane_za32_f32_f32', which expects a value in the 
range \[0, 3\]} } */
+
+  svtmopa_lane_za32_f32_f32 (0, u8, f32, u8, 0); /* { dg-error {incompatible 
type for argument 2 of 'svtmopa_lane_za32_f32_f32'} } */
+  svtmopa_lane_za32_f32_f32 (0, f32, f32, u8, 0); /* { dg-error {incompatible 
type for argument 2 of 'svtmopa_lane_za32_f32_f32'} } */
+  svtmopa_lane_za32_f32_f32 (0, f16x2, f32, u8, 0); /* { dg-error 
{incompatible type for argument 2 of 'svtmopa_lane_za32_f32_f32'} } */
+
+  svtmopa_lane_za32_f32_f32 (0, f32x2, f16, u8, 0); /* { dg-error 
{incompatible type for argument 3 of 'svtmopa_lane_za32_f32_f32'} } */
+  svtmopa_lane_za32_f32_f32 (0, f32x2, f32x2, u8, 0); /* { dg-error 
{incompatible type for argument 3 of 'svtmopa_lane_za32_f32_f32'} } */
+  svtmopa_lane_za32_f32_f32 (0, f32x2, u8, u8, 0); /* { dg-error {incompatible 
type for argument 3 of 'svtmopa_lane_za32_f32_f32'} } */
+
+  svtmopa_lane_za32_f32_f32 (0, f32x2, f32, u16, 0); /* { dg-error 
{incompatible type for argument 4 of 'svtmopa_lane_za32_f32_f32'} } */
+  svtmopa_lane_za32_f32_f32 (0, f32x2, f32, 0, 0); /* { dg-error {incompatible 
type for argument 4 of 'svtmopa_lane_za32_f32_f32'} } */
+  svtmopa_lane_za32_f32_f32 (0, f32x2, f32, f32, 0); /* { dg-error 
{incompatible type for argument 4 of 'svtmopa_lane_za32_f32_f32'} } */
+
+  svtmopa_lane_za32_s8_u8(0, s8x2, u8, u8, 0);
+  svtmopa_lane_za32_s8_u8(0, u8x2, u8, u8, 0); /* { dg-error {incompatible 
type for argument 2 of 'svtmopa_lane_za32_s8_u8'} } */
+  svtmopa_lane_za32_s8_u8(0, s8x2, s8, u8, 0); /* { dg-error {incompatible 
type for argument 3 of 'svtmopa_lane_za32_s8_u8'} } */
+  svtmopa_lane_za32_u8_s8(0, s8x2, s8, u8, 0); /* { dg-error {incompatible 
type for argument 2 of 'svtmopa_lane_za32_u8_s8'} } */
+  svtmopa_lane_za32_u8_s8(0, u8x2, u8, u8, 0); /* { dg-error {incompatible 
type for argument 3 of 'svtmopa_lane_za32_u8_s8'} } */
+}
+
+void
+f2 (svfloat32x2_t f32x2, svfloat32_t f32, svuint8_t u8) __arm_streaming
+{
+  svtmopa_lane_za32_f32_f32 (0, f32x2, f32, u8, 0); /* { dg-error {ACLE 
function 'svtmopa_lane_za32_f32_f32' can only be called from a function that 
has 'za' state} } */
+}
+
+void
+f3 (svfloat32x2_t f32x2, svfloat32_t f32, svuint8_t u8) __arm_inout("za")
+{
+  svtmopa_lane_za32_f32_f32 (0, f32x2, f32, u8, 0); /* { dg-error {ACLE 
function 'svtmopa_lane_za32_f32_f32' can only be called when SME streaming mode 
is enabled} } */
+}
+
+#pragma GCC target ("arch=armv9-a+sme-tmop+sme-f8f16")
+
+void
+f4 (svmfloat8x2_t mf8x2, svmfloat8_t mf8, svuint8_t u8, fpm_t fpm)
+  __arm_streaming __arm_inout("za")
+{
+
+  svtmopa_lane_za16_mf8_mf8_fpm (0, mf8x2, mf8, u8); /* { dg-error {too few 
arguments to function 'svtmopa_lane_za16_mf8_mf8_fpm'} } */
+  svtmopa_lane_za16_mf8_mf8_fpm (0, mf8x2, mf8, u8, 0, 0, fpm); /* { dg-error 
{too many arguments to function 'svtmopa_lane_za16_mf8_mf8_fpm'} } */
+  svtmopa_lane_za16_mf8_mf8_fpm (-1, mf8x2, mf8, u8, 0, fpm); /* { dg-error 
{passing -1 to argument 1 of 'svtmopa_lane_za16_mf8_mf8_fpm', which expects a 
value in the range \[0, 1\]} } */
+  svtmopa_lane_za16_mf8_mf8_fpm (2,  mf8x2, mf8, u8, 0, fpm); /* { dg-error 
{passing 2 to argument 1 of 'svtmopa_lane_za16_mf8_mf8_fpm', which expects a 
value in the range \[0, 1\]} } */
+  svtmopa_lane_za16_mf8_mf8_fpm (0, mf8x2, mf8, u8, 0, mf8); /* { dg-error 
{incompatible type for argument 6 of 'svtmopa_lane_za16_mf8_mf8_fpm'} } */
+}
+
+#pragma GCC target ("arch=armv9-a+sme-tmop+sme-f16f16")
+
+void
+f5 (svfloat16x2_t f16x2, svfloat16_t f16,
+    svuint8_t u8)
+  __arm_streaming __arm_inout("za")
+{
+  svtmopa_lane_za16_f16_f16 (-1, f16x2, f16, u8, 0); /* { dg-error {passing -1 
to argument 1 of 'svtmopa_lane_za16_f16_f16', which expects a value in the 
range \[0, 1\]} } */
+  svtmopa_lane_za16_f16_f16 (2, f16x2, f16, u8, 0); /* { dg-error {passing 2 
to argument 1 of 'svtmopa_lane_za16_f16_f16', which expects a value in the 
range \[0, 1\]} } */
+
+  svtmopa_lane_za16_f16_f16 (1, f16x2, f16, u8, -1); /* { dg-error {passing -1 
to argument 5 of 'svtmopa_lane_za16_f16_f16', which expects a value in the 
range \[0, 3\]} } */
+  svtmopa_lane_za16_f16_f16 (1, f16x2, f16, u8, 4); /* { dg-error {passing 4 
to argument 5 of 'svtmopa_lane_za16_f16_f16', which expects a value in the 
range \[0, 3\]} } */
+}
+
diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index 2b450669c3d..066ef42d440 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -12683,6 +12683,7 @@ set exts_sve2 {
     "sme-f8f16" "sme-f8f32"
     "sme-b16b16" "sme-f16f16" "sme-i16i64" "sme" "sme2" "sme2p1"
     "ssve-fp8dot2" "ssve-fp8dot4" "ssve-fp8fma"
+    "sme-tmop"
 }
 
 foreach { aarch64_ext } $exts {
-- 
2.51.0


Reply via email to