DavidSpickett updated this revision to Diff 550289.
DavidSpickett added a comment.

Replace missing process variable in TestZAThreadedDynamic.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D157883/new/

https://reviews.llvm.org/D157883

Files:
  lldb/include/lldb/Utility/RegisterValue.h
  lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.cpp
  lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.h
  lldb/source/Plugins/Process/Utility/LinuxPTraceDefines_arm64sve.h
  lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm64.cpp
  lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm64.h
  lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp
  lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h
  lldb/source/Plugins/Process/elf-core/RegisterUtilities.h
  lldb/source/Plugins/Process/gdb-remote/GDBRemoteRegisterContext.cpp
  lldb/source/Plugins/Process/gdb-remote/GDBRemoteRegisterContext.h
  lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp
  lldb/source/Target/DynamicRegisterInfo.cpp
  
lldb/test/API/commands/register/register/aarch64_dynamic_regset/TestArm64DynamicRegsets.py
  
lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py
  
lldb/test/API/commands/register/register/aarch64_za_reg/za_dynamic_resize/Makefile
  
lldb/test/API/commands/register/register/aarch64_za_reg/za_dynamic_resize/TestZAThreadedDynamic.py
  
lldb/test/API/commands/register/register/aarch64_za_reg/za_dynamic_resize/main.c
  
lldb/test/API/commands/register/register/aarch64_za_reg/za_save_restore/Makefile
  
lldb/test/API/commands/register/register/aarch64_za_reg/za_save_restore/TestZARegisterSaveRestore.py
  lldb/test/API/commands/register/register/aarch64_za_reg/za_save_restore/main.c

Index: lldb/test/API/commands/register/register/aarch64_za_reg/za_save_restore/main.c
===================================================================
--- /dev/null
+++ lldb/test/API/commands/register/register/aarch64_za_reg/za_save_restore/main.c
@@ -0,0 +1,225 @@
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/prctl.h>
+
+// Important details for this program:
+// * Making a syscall will disable streaming mode if it is active.
+// * Changing the vector length will make streaming mode and ZA inactive.
+// * ZA can be active independent of streaming mode.
+// * ZA's size is the streaming vector length squared.
+
+#ifndef PR_SME_SET_VL
+#define PR_SME_SET_VL 63
+#endif
+
+#ifndef PR_SME_GET_VL
+#define PR_SME_GET_VL 64
+#endif
+
+#ifndef PR_SME_VL_LEN_MASK
+#define PR_SME_VL_LEN_MASK 0xffff
+#endif
+
+#define SM_INST(c) asm volatile("msr s0_3_c4_c" #c "_3, xzr")
+#define SMSTART SM_INST(7)
+#define SMSTART_SM SM_INST(3)
+#define SMSTART_ZA SM_INST(5)
+#define SMSTOP SM_INST(6)
+#define SMSTOP_SM SM_INST(2)
+#define SMSTOP_ZA SM_INST(4)
+
+int start_vl = 0;
+int other_vl = 0;
+
+void write_sve_regs() {
+  // We assume the smefa64 feature is present, which allows ffr access
+  // in streaming mode.
+  asm volatile("setffr\n\t");
+  asm volatile("ptrue p0.b\n\t");
+  asm volatile("ptrue p1.h\n\t");
+  asm volatile("ptrue p2.s\n\t");
+  asm volatile("ptrue p3.d\n\t");
+  asm volatile("pfalse p4.b\n\t");
+  asm volatile("ptrue p5.b\n\t");
+  asm volatile("ptrue p6.h\n\t");
+  asm volatile("ptrue p7.s\n\t");
+  asm volatile("ptrue p8.d\n\t");
+  asm volatile("pfalse p9.b\n\t");
+  asm volatile("ptrue p10.b\n\t");
+  asm volatile("ptrue p11.h\n\t");
+  asm volatile("ptrue p12.s\n\t");
+  asm volatile("ptrue p13.d\n\t");
+  asm volatile("pfalse p14.b\n\t");
+  asm volatile("ptrue p15.b\n\t");
+
+  asm volatile("cpy  z0.b, p0/z, #1\n\t");
+  asm volatile("cpy  z1.b, p5/z, #2\n\t");
+  asm volatile("cpy  z2.b, p10/z, #3\n\t");
+  asm volatile("cpy  z3.b, p15/z, #4\n\t");
+  asm volatile("cpy  z4.b, p0/z, #5\n\t");
+  asm volatile("cpy  z5.b, p5/z, #6\n\t");
+  asm volatile("cpy  z6.b, p10/z, #7\n\t");
+  asm volatile("cpy  z7.b, p15/z, #8\n\t");
+  asm volatile("cpy  z8.b, p0/z, #9\n\t");
+  asm volatile("cpy  z9.b, p5/z, #10\n\t");
+  asm volatile("cpy  z10.b, p10/z, #11\n\t");
+  asm volatile("cpy  z11.b, p15/z, #12\n\t");
+  asm volatile("cpy  z12.b, p0/z, #13\n\t");
+  asm volatile("cpy  z13.b, p5/z, #14\n\t");
+  asm volatile("cpy  z14.b, p10/z, #15\n\t");
+  asm volatile("cpy  z15.b, p15/z, #16\n\t");
+  asm volatile("cpy  z16.b, p0/z, #17\n\t");
+  asm volatile("cpy  z17.b, p5/z, #18\n\t");
+  asm volatile("cpy  z18.b, p10/z, #19\n\t");
+  asm volatile("cpy  z19.b, p15/z, #20\n\t");
+  asm volatile("cpy  z20.b, p0/z, #21\n\t");
+  asm volatile("cpy  z21.b, p5/z, #22\n\t");
+  asm volatile("cpy  z22.b, p10/z, #23\n\t");
+  asm volatile("cpy  z23.b, p15/z, #24\n\t");
+  asm volatile("cpy  z24.b, p0/z, #25\n\t");
+  asm volatile("cpy  z25.b, p5/z, #26\n\t");
+  asm volatile("cpy  z26.b, p10/z, #27\n\t");
+  asm volatile("cpy  z27.b, p15/z, #28\n\t");
+  asm volatile("cpy  z28.b, p0/z, #29\n\t");
+  asm volatile("cpy  z29.b, p5/z, #30\n\t");
+  asm volatile("cpy  z30.b, p10/z, #31\n\t");
+  asm volatile("cpy  z31.b, p15/z, #32\n\t");
+}
+
+// Write something different so we will know if we didn't restore them
+// correctly.
+void write_sve_regs_expr() {
+  asm volatile("pfalse p0.b\n\t");
+  asm volatile("wrffr p0.b\n\t");
+  asm volatile("pfalse p1.b\n\t");
+  asm volatile("pfalse p2.b\n\t");
+  asm volatile("pfalse p3.b\n\t");
+  asm volatile("ptrue p4.b\n\t");
+  asm volatile("pfalse p5.b\n\t");
+  asm volatile("pfalse p6.b\n\t");
+  asm volatile("pfalse p7.b\n\t");
+  asm volatile("pfalse p8.b\n\t");
+  asm volatile("ptrue p9.b\n\t");
+  asm volatile("pfalse p10.b\n\t");
+  asm volatile("pfalse p11.b\n\t");
+  asm volatile("pfalse p12.b\n\t");
+  asm volatile("pfalse p13.b\n\t");
+  asm volatile("ptrue p14.b\n\t");
+  asm volatile("pfalse p15.b\n\t");
+
+  asm volatile("cpy  z0.b, p0/z, #2\n\t");
+  asm volatile("cpy  z1.b, p5/z, #3\n\t");
+  asm volatile("cpy  z2.b, p10/z, #4\n\t");
+  asm volatile("cpy  z3.b, p15/z, #5\n\t");
+  asm volatile("cpy  z4.b, p0/z, #6\n\t");
+  asm volatile("cpy  z5.b, p5/z, #7\n\t");
+  asm volatile("cpy  z6.b, p10/z, #8\n\t");
+  asm volatile("cpy  z7.b, p15/z, #9\n\t");
+  asm volatile("cpy  z8.b, p0/z, #10\n\t");
+  asm volatile("cpy  z9.b, p5/z, #11\n\t");
+  asm volatile("cpy  z10.b, p10/z, #12\n\t");
+  asm volatile("cpy  z11.b, p15/z, #13\n\t");
+  asm volatile("cpy  z12.b, p0/z, #14\n\t");
+  asm volatile("cpy  z13.b, p5/z, #15\n\t");
+  asm volatile("cpy  z14.b, p10/z, #16\n\t");
+  asm volatile("cpy  z15.b, p15/z, #17\n\t");
+  asm volatile("cpy  z16.b, p0/z, #18\n\t");
+  asm volatile("cpy  z17.b, p5/z, #19\n\t");
+  asm volatile("cpy  z18.b, p10/z, #20\n\t");
+  asm volatile("cpy  z19.b, p15/z, #21\n\t");
+  asm volatile("cpy  z20.b, p0/z, #22\n\t");
+  asm volatile("cpy  z21.b, p5/z, #23\n\t");
+  asm volatile("cpy  z22.b, p10/z, #24\n\t");
+  asm volatile("cpy  z23.b, p15/z, #25\n\t");
+  asm volatile("cpy  z24.b, p0/z, #26\n\t");
+  asm volatile("cpy  z25.b, p5/z, #27\n\t");
+  asm volatile("cpy  z26.b, p10/z, #28\n\t");
+  asm volatile("cpy  z27.b, p15/z, #29\n\t");
+  asm volatile("cpy  z28.b, p0/z, #30\n\t");
+  asm volatile("cpy  z29.b, p5/z, #31\n\t");
+  asm volatile("cpy  z30.b, p10/z, #32\n\t");
+  asm volatile("cpy  z31.b, p15/z, #33\n\t");
+}
+
+void set_za_register(int svl, int value_offset) {
+#define MAX_VL_BYTES 256
+  uint8_t data[MAX_VL_BYTES];
+
+  // ldr za will actually wrap the selected vector row, by the number of rows
+  // you have. So setting one that didn't exist would actually set one that did.
+  // That's why we need the streaming vector length here.
+  for (int i = 0; i < svl; ++i) {
+    memset(data, i + value_offset, MAX_VL_BYTES);
+    // Each one of these loads a VL sized row of ZA.
+    asm volatile("mov w12, %w0\n\t"
+                 "ldr za[w12, 0], [%1]\n\t" ::"r"(i),
+                 "r"(&data)
+                 : "w12");
+  }
+}
+
+void expr_disable_za() {
+  SMSTOP_ZA;
+  write_sve_regs_expr();
+}
+
+void expr_enable_za() {
+  SMSTART_ZA;
+  set_za_register(start_vl, 2);
+  write_sve_regs_expr();
+}
+
+void expr_start_vl() {
+  prctl(PR_SME_SET_VL, start_vl);
+  SMSTART_ZA;
+  set_za_register(start_vl, 4);
+  write_sve_regs_expr();
+}
+
+void expr_other_vl() {
+  prctl(PR_SME_SET_VL, other_vl);
+  SMSTART_ZA;
+  set_za_register(other_vl, 5);
+  write_sve_regs_expr();
+}
+
+void expr_enable_sm() {
+  SMSTART_SM;
+  write_sve_regs_expr();
+}
+
+void expr_disable_sm() {
+  SMSTOP_SM;
+  write_sve_regs_expr();
+}
+
+int main(int argc, char *argv[]) {
+  // We expect to get:
+  // * whether to enable streaming mode
+  // * whether to enable ZA
+  // * what the starting VL should be
+  // * what the other VL should be
+  if (argc != 5)
+    return 1;
+
+  bool ssve = argv[1][0] == '1';
+  bool za = argv[2][0] == '1';
+  start_vl = atoi(argv[3]);
+  other_vl = atoi(argv[4]);
+
+  prctl(PR_SME_SET_VL, start_vl);
+
+  if (ssve)
+    SMSTART_SM;
+
+  if (za) {
+    SMSTART_ZA;
+    set_za_register(start_vl, 1);
+  }
+
+  write_sve_regs();
+
+  return 0; // Set a break point here.
+}
Index: lldb/test/API/commands/register/register/aarch64_za_reg/za_save_restore/TestZARegisterSaveRestore.py
===================================================================
--- /dev/null
+++ lldb/test/API/commands/register/register/aarch64_za_reg/za_save_restore/TestZARegisterSaveRestore.py
@@ -0,0 +1,237 @@
+"""
+Test the AArch64 SME ZA register is saved and restored around expressions.
+
+This attempts to cover expressions that change the following:
+* ZA enabled or not.
+* Streaming mode or not.
+* Streaming vector length (increasing and decreasing).
+* Some combintations of the above.
+"""
+
+from enum import IntEnum
+import lldb
+from lldbsuite.test.decorators import *
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test import lldbutil
+
+
+# These enum values match the flag values used in the test program.
+class Mode(IntEnum):
+    SVE = 0
+    SSVE = 1
+
+
+class ZA(IntEnum):
+    Disabled = 0
+    Enabled = 1
+
+
+class AArch64ZATestCase(TestBase):
+    def get_supported_svg(self):
+        # Always build this probe program to start as streaming SVE.
+        # We will read/write "vg" here but since we are in streaming mode "svg"
+        # is really what we are writing ("svg" is a read only pseudo).
+        self.build()
+
+        exe = self.getBuildArtifact("a.out")
+        self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET)
+        # Enter streaming mode, don't enable ZA, start_vl and other_vl don't
+        # matter here.
+        self.runCmd("settings set target.run-args 1 0 0 0")
+
+        stop_line = line_number("main.c", "// Set a break point here.")
+        lldbutil.run_break_set_by_file_and_line(self, "main.c", stop_line,
+                                                num_expected_locations=1)
+
+        self.runCmd("run", RUN_SUCCEEDED)
+
+        self.expect(
+            "thread info 1",
+            STOPPED_DUE_TO_BREAKPOINT,
+            substrs=["stop reason = breakpoint"],
+        )
+
+        # Write back the current vg to confirm read/write works at all.
+        current_svg = self.match("register read vg", ["(0x[0-9]+)"])
+        self.assertTrue(current_svg is not None)
+        self.expect("register write vg {}".format(current_svg.group()))
+
+        # Aka 128, 256 and 512 bit.
+        supported_svg = []
+        for svg in [2, 4, 8]:
+            # This could mask other errors but writing vg is tested elsewhere
+            # so we assume the hardware rejected the value.
+            self.runCmd("register write vg {}".format(svg), check=False)
+            if not self.res.GetError():
+                supported_svg.append(svg)
+
+        self.runCmd("breakpoint delete 1")
+        self.runCmd("continue")
+
+        return supported_svg
+
+    def read_vg(self):
+        process = self.dbg.GetSelectedTarget().GetProcess()
+        registerSets = process.GetThreadAtIndex(0).GetFrameAtIndex(0).GetRegisters()
+        sve_registers = registerSets.GetFirstValueByName("Scalable Vector Extension Registers")
+        return sve_registers.GetChildMemberWithName("vg").GetValueAsUnsigned()
+
+    def read_svg(self):
+        process = self.dbg.GetSelectedTarget().GetProcess()
+        registerSets = process.GetThreadAtIndex(0).GetFrameAtIndex(0).GetRegisters()
+        sve_registers = registerSets.GetFirstValueByName("Scalable Matrix Extension Registers")
+        return sve_registers.GetChildMemberWithName("svg").GetValueAsUnsigned()
+
+    def make_za_value(self, vl, generator):
+        # Generate a vector value string "{0x00 0x01....}".
+        rows = []
+        for row in range(vl):
+            byte = "0x{:02x}".format(generator(row))
+            rows.append(" ".join([byte]*vl))
+        return "{" + " ".join(rows) + "}"
+
+    def check_za(self, vl):
+        # We expect an increasing value starting at 1. Row 0=1, row 1 = 2, etc.
+        self.expect("register read za", substrs=[
+            self.make_za_value(vl, lambda row: row+1)])
+
+    def check_za_disabled(self, vl):
+        # When ZA is disabled, lldb will show ZA as all 0s.
+        self.expect("register read za", substrs=[
+            self.make_za_value(vl, lambda row: 0)])
+
+    def za_expr_test_impl(self, sve_mode, za_state, swap_start_vl):
+        if not self.isAArch64SME():
+            self.skipTest("SME must be present.")
+
+        supported_svg = self.get_supported_svg()
+        if len(supported_svg) < 2:
+            self.skipTest("Target must support at least 2 streaming vector lengths.")
+
+        # vg is in units of 8 bytes.
+        start_vl = supported_svg[1] * 8
+        other_vl = supported_svg[2] * 8
+
+        if swap_start_vl:
+            start_vl, other_vl = other_vl, start_vl
+
+        self.line = line_number("main.c", "// Set a break point here.")
+
+        exe = self.getBuildArtifact("a.out")
+        self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET)
+        self.runCmd("settings set target.run-args {} {} {} {}".format(sve_mode,
+                    za_state, start_vl, other_vl))
+
+        lldbutil.run_break_set_by_file_and_line(
+            self, "main.c", self.line, num_expected_locations=1
+        )
+        self.runCmd("run", RUN_SUCCEEDED)
+
+        self.expect(
+            "thread backtrace",
+            STOPPED_DUE_TO_BREAKPOINT,
+            substrs=["stop reason = breakpoint 1."],
+        )
+
+        exprs = ["expr_disable_za", "expr_enable_za", "expr_start_vl",
+                 "expr_other_vl", "expr_enable_sm", "expr_disable_sm"]
+
+        # This may be the streaming or non-streaming vg. All that matters is
+        # that it is saved and restored, remaining constant throughout.
+        start_vg = self.read_vg()
+
+        # Check SVE registers to make sure that combination of scaling SVE
+        # and scaling ZA works properly. This is a brittle check, but failures
+        # are likely to be catastrophic when they do happen anyway.
+        sve_reg_names = "ffr {} {}".format(
+            " ".join(["z{}".format(n) for n in range(32)]),
+            " ".join(["p{}".format(n) for n in range(16)]))
+        self.runCmd("register read " + sve_reg_names)
+        sve_values = self.res.GetOutput()
+
+        def check_regs():
+            if za_state == ZA.Enabled:
+                self.check_za(start_vl)
+            else:
+                self.check_za_disabled(start_vl)
+
+            # svg and vg are in units of 8 bytes.
+            self.assertEqual(start_vl, self.read_svg()*8)
+            self.assertEqual(start_vg, self.read_vg())
+
+            self.expect("register read " + sve_reg_names, substrs=[sve_values])
+
+        for expr in exprs:
+            expr_cmd = "expression {}()".format(expr)
+
+            # We do this twice because there were issues in development where
+            # using data stored by a previous WriteAllRegisterValues would crash
+            # the second time around.
+            self.runCmd(expr_cmd)
+            check_regs()
+            self.runCmd(expr_cmd)
+            check_regs()
+
+        # Run them in sequence to make sure there is no state lingering between
+        # them after a restore.
+        for expr in exprs:
+            self.runCmd("expression {}()".format(expr))
+            check_regs()
+
+        for expr in reversed(exprs):
+            self.runCmd("expression {}()".format(expr))
+            check_regs()
+
+    # These tests start with the 1st supported SVL and change to the 2nd
+    # supported SVL as needed.
+
+    @no_debug_info_test
+    @skipIf(archs=no_match(["aarch64"]))
+    @skipIf(oslist=no_match(["linux"]))
+    def test_za_expr_ssve_za_enabled(self):
+        self.za_expr_test_impl(Mode.SSVE, ZA.Enabled, False)
+
+    @no_debug_info_test
+    @skipIf(archs=no_match(["aarch64"]))
+    @skipIf(oslist=no_match(["linux"]))
+    def test_za_expr_ssve_za_disabled(self):
+        self.za_expr_test_impl(Mode.SSVE, ZA.Disabled, False)
+
+    @no_debug_info_test
+    @skipIf(archs=no_match(["aarch64"]))
+    @skipIf(oslist=no_match(["linux"]))
+    def test_za_expr_sve_za_enabled(self):
+        self.za_expr_test_impl(Mode.SVE, ZA.Enabled, False)
+
+    @no_debug_info_test
+    @skipIf(archs=no_match(["aarch64"]))
+    @skipIf(oslist=no_match(["linux"]))
+    def test_za_expr_sve_za_disabled(self):
+        self.za_expr_test_impl(Mode.SVE, ZA.Disabled, False)
+
+    # These tests start in the 2nd supported SVL and change to the 1st supported
+    # SVL as needed.
+
+    @no_debug_info_test
+    @skipIf(archs=no_match(["aarch64"]))
+    @skipIf(oslist=no_match(["linux"]))
+    def test_za_expr_ssve_za_enabled_different_vl(self):
+        self.za_expr_test_impl(Mode.SSVE, ZA.Enabled, True)
+
+    @no_debug_info_test
+    @skipIf(archs=no_match(["aarch64"]))
+    @skipIf(oslist=no_match(["linux"]))
+    def test_za_expr_ssve_za_disabled_different_vl(self):
+        self.za_expr_test_impl(Mode.SSVE, ZA.Disabled, True)
+
+    @no_debug_info_test
+    @skipIf(archs=no_match(["aarch64"]))
+    @skipIf(oslist=no_match(["linux"]))
+    def test_za_expr_sve_za_enabled_different_vl(self):
+        self.za_expr_test_impl(Mode.SVE, ZA.Enabled, True)
+
+    @no_debug_info_test
+    @skipIf(archs=no_match(["aarch64"]))
+    @skipIf(oslist=no_match(["linux"]))
+    def test_za_expr_sve_za_disabled_different_vl(self):
+        self.za_expr_test_impl(Mode.SVE, ZA.Disabled, True)
Index: lldb/test/API/commands/register/register/aarch64_za_reg/za_save_restore/Makefile
===================================================================
--- /dev/null
+++ lldb/test/API/commands/register/register/aarch64_za_reg/za_save_restore/Makefile
@@ -0,0 +1,5 @@
+C_SOURCES := main.c
+
+CFLAGS_EXTRAS := -march=armv8-a+sve+sme
+
+include Makefile.rules
Index: lldb/test/API/commands/register/register/aarch64_za_reg/za_dynamic_resize/main.c
===================================================================
--- /dev/null
+++ lldb/test/API/commands/register/register/aarch64_za_reg/za_dynamic_resize/main.c
@@ -0,0 +1,102 @@
+#include <pthread.h>
+#include <stdatomic.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/prctl.h>
+
+// Important notes for this test:
+// * Making a syscall will disable streaming mode.
+// * LLDB writing to vg while in streaming mode will disable ZA
+//   (this is just how ptrace works).
+// * Writing to an inactive ZA produces a SIGILL.
+
+#ifndef PR_SME_SET_VL
+#define PR_SME_SET_VL 63
+#endif
+
+#define SM_INST(c) asm volatile("msr s0_3_c4_c" #c "_3, xzr")
+#define SMSTART_SM SM_INST(3)
+#define SMSTART_ZA SM_INST(5)
+
+void set_za_register(int svl, int value_offset) {
+#define MAX_VL_BYTES 256
+  uint8_t data[MAX_VL_BYTES];
+
+  // ldr za will actually wrap the selected vector row, by the number of rows
+  // you have. So setting one that didn't exist would actually set one that did.
+  // That's why we need the streaming vector length here.
+  for (int i = 0; i < svl; ++i) {
+    memset(data, i + value_offset, MAX_VL_BYTES);
+    // Each one of these loads a VL sized row of ZA.
+    asm volatile("mov w12, %w0\n\t"
+                 "ldr za[w12, 0], [%1]\n\t" ::"r"(i),
+                 "r"(&data)
+                 : "w12");
+  }
+}
+
+// These are used to make sure we only break in each thread once both of the
+// threads have been started. Otherwise when the test does "process continue"
+// it could stop in one thread and wait forever for the other one to start.
+atomic_bool threadX_ready = false;
+atomic_bool threadY_ready = false;
+
+void *threadX_func(void *x_arg) {
+  threadX_ready = true;
+  while (!threadY_ready) {
+  }
+
+  prctl(PR_SME_SET_VL, 8 * 4);
+  SMSTART_SM;
+  SMSTART_ZA;
+  set_za_register(8 * 4, 2);
+  SMSTART_ZA; // Thread X breakpoint 1
+  set_za_register(8 * 2, 2);
+  return NULL; // Thread X breakpoint 2
+}
+
+void *threadY_func(void *y_arg) {
+  threadY_ready = true;
+  while (!threadX_ready) {
+  }
+
+  prctl(PR_SME_SET_VL, 8 * 2);
+  SMSTART_SM;
+  SMSTART_ZA;
+  set_za_register(8 * 2, 3);
+  SMSTART_ZA; // Thread Y breakpoint 1
+  set_za_register(8 * 4, 3);
+  return NULL; // Thread Y breakpoint 2
+}
+
+int main(int argc, char *argv[]) {
+  // Expecting argument to tell us whether to enable ZA on the main thread.
+  if (argc != 2)
+    return 1;
+
+  prctl(PR_SME_SET_VL, 8 * 8);
+  SMSTART_SM;
+
+  if (argv[1][0] == '1') {
+    SMSTART_ZA;
+    set_za_register(8 * 8, 1);
+  }
+  // else we do not enable ZA and lldb will show 0s for it.
+
+  pthread_t x_thread;
+  if (pthread_create(&x_thread, NULL, threadX_func, 0)) // Break in main thread
+    return 1;
+
+  pthread_t y_thread;
+  if (pthread_create(&y_thread, NULL, threadY_func, 0))
+    return 1;
+
+  if (pthread_join(x_thread, NULL))
+    return 2;
+
+  if (pthread_join(y_thread, NULL))
+    return 2;
+
+  return 0;
+}
Index: lldb/test/API/commands/register/register/aarch64_za_reg/za_dynamic_resize/TestZAThreadedDynamic.py
===================================================================
--- lldb/test/API/commands/register/register/aarch64_za_reg/za_dynamic_resize/TestZAThreadedDynamic.py
+++ lldb/test/API/commands/register/register/aarch64_za_reg/za_dynamic_resize/TestZAThreadedDynamic.py
@@ -1,11 +1,6 @@
 """
-Test the AArch64 SVE and Streaming SVE (SSVE) registers dynamic resize with
+Test the AArch64 SME Array Storage (ZA) register dynamic resize with
 multiple threads.
-
-This test assumes a minimum supported vector length (VL) of 256 bits
-and will test 512 bits if possible. We refer to "vg" which is the
-register shown in lldb. This is in units of 64 bits. 256 bit VL is
-the same as a vg of 4.
 """
 
 from enum import Enum
@@ -15,21 +10,15 @@
 from lldbsuite.test import lldbutil
 
 
-class Mode(Enum):
-    SVE = 0
-    SSVE = 1
-
-
-class RegisterCommandsTestCase(TestBase):
+class AArch64ZAThreadedTestCase(TestBase):
     def get_supported_vg(self):
-        # Changing VL trashes the register state, so we need to run the program
-        # just to test this. Then run it again for the test.
         exe = self.getBuildArtifact("a.out")
         self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET)
 
         main_thread_stop_line = line_number("main.c", "// Break in main thread")
         lldbutil.run_break_set_by_file_and_line(self, "main.c", main_thread_stop_line)
 
+        self.runCmd("settings set target.run-args 0")
         self.runCmd("run", RUN_SUCCEEDED)
 
         self.expect(
@@ -38,7 +27,6 @@
             substrs=["stop reason = breakpoint"],
         )
 
-        # Write back the current vg to confirm read/write works at all.
         current_vg = self.match("register read vg", ["(0x[0-9]+)"])
         self.assertTrue(current_vg is not None)
         self.expect("register write vg {}".format(current_vg.group()))
@@ -57,64 +45,36 @@
 
         return supported_vg
 
-    def check_sve_registers(self, vg_test_value):
-        z_reg_size = vg_test_value * 8
-        p_reg_size = int(z_reg_size / 8)
-
-        p_value_bytes = ["0xff", "0x55", "0x11", "0x01", "0x00"]
-
-        for i in range(32):
-            s_reg_value = "s%i = 0x" % (i) + "".join(
-                "{:02x}".format(i + 1) for _ in range(4)
-            )
-
-            d_reg_value = "d%i = 0x" % (i) + "".join(
-                "{:02x}".format(i + 1) for _ in range(8)
-            )
-
-            v_reg_value = "v%i = 0x" % (i) + "".join(
-                "{:02x}".format(i + 1) for _ in range(16)
-            )
-
-            z_reg_value = (
-                "{"
-                + " ".join("0x{:02x}".format(i + 1) for _ in range(z_reg_size))
-                + "}"
-            )
-
-            self.expect("register read -f hex " + "s%i" % (i), substrs=[s_reg_value])
+    def gen_za_value(self, svg, value_generator):
+        svl = svg*8
 
-            self.expect("register read -f hex " + "d%i" % (i), substrs=[d_reg_value])
+        rows = []
+        for row in range(svl):
+            byte = "0x{:02x}".format(value_generator(row))
+            rows.append(" ".join([byte]*svl))
 
-            self.expect("register read -f hex " + "v%i" % (i), substrs=[v_reg_value])
+        return "{" + " ".join(rows) + "}"
 
-            self.expect("register read " + "z%i" % (i), substrs=[z_reg_value])
+    def check_za_register(self, svg, value_offset):
+        self.expect("register read za", substrs=[
+            self.gen_za_value(svg, lambda r: r+value_offset)])
 
-        for i in range(16):
-            p_regs_value = (
-                "{" + " ".join(p_value_bytes[i % 5] for _ in range(p_reg_size)) + "}"
-            )
-            self.expect("register read " + "p%i" % (i), substrs=[p_regs_value])
+    def check_disabled_za_register(self, svg):
+        self.expect("register read za", substrs=[
+            self.gen_za_value(svg, lambda r: 0)])
 
-        self.expect("register read ffr", substrs=[p_regs_value])
-
-    def run_sve_test(self, mode):
-        if (mode == Mode.SVE) and not self.isAArch64SVE():
-            self.skipTest("SVE registers must be supported.")
-
-        if (mode == Mode.SSVE) and not self.isAArch64SME():
-            self.skipTest("Streaming SVE registers must be supported.")
-
-        cflags = "-march=armv8-a+sve -lpthread"
-        if mode == Mode.SSVE:
-            cflags += " -DUSE_SSVE"
-        self.build(dictionary={"CFLAGS_EXTRAS": cflags})
+    def za_test_impl(self, enable_za):
+        if not self.isAArch64SME():
+            self.skipTest("SME must be present.")
 
         self.build()
         supported_vg = self.get_supported_vg()
 
+        self.runCmd("settings set target.run-args {}".format(
+            '1' if enable_za else '0'))
+
         if not (2 in supported_vg and 4 in supported_vg):
-            self.skipTest("Not all required SVE vector lengths are supported.")
+            self.skipTest("Not all required streaming vector lengths are supported.")
 
         main_thread_stop_line = line_number("main.c", "// Break in main thread")
         lldbutil.run_break_set_by_file_and_line(self, "main.c", main_thread_stop_line)
@@ -133,8 +93,6 @@
 
         self.runCmd("run", RUN_SUCCEEDED)
 
-        process = self.dbg.GetSelectedTarget().GetProcess()
-
         self.expect(
             "thread info 1",
             STOPPED_DUE_TO_BREAKPOINT,
@@ -142,16 +100,19 @@
         )
 
         if 8 in supported_vg:
-            self.check_sve_registers(8)
+            if enable_za:
+                self.check_za_register(8, 1)
+            else:
+                self.check_disabled_za_register(8)
         else:
-            self.check_sve_registers(4)
+            if enable_za:
+                self.check_za_register(4, 1)
+            else:
+                self.check_disabled_za_register(4)
 
         self.runCmd("process continue", RUN_SUCCEEDED)
 
-        # If we start the checks too quickly, thread 3 may not have started.
-        while process.GetNumThreads() < 3:
-            pass
-
+        process = self.dbg.GetSelectedTarget().GetProcess()
         for idx in range(1, process.GetNumThreads()):
             thread = process.GetThreadAtIndex(idx)
             if thread.GetStopReason() != lldb.eStopReasonBreakpoint:
@@ -162,12 +123,12 @@
 
             if stopped_at_line_number == thX_break_line1:
                 self.runCmd("thread select %d" % (idx + 1))
-                self.check_sve_registers(4)
+                self.check_za_register(4, 2)
                 self.runCmd("register write vg 2")
 
             elif stopped_at_line_number == thY_break_line1:
                 self.runCmd("thread select %d" % (idx + 1))
-                self.check_sve_registers(2)
+                self.check_za_register(2, 3)
                 self.runCmd("register write vg 4")
 
         self.runCmd("thread continue 2")
@@ -181,22 +142,24 @@
 
             if stopped_at_line_number == thX_break_line2:
                 self.runCmd("thread select %d" % (idx + 1))
-                self.check_sve_registers(2)
+                self.check_za_register(2, 2)
 
             elif stopped_at_line_number == thY_break_line2:
                 self.runCmd("thread select %d" % (idx + 1))
-                self.check_sve_registers(4)
+                self.check_za_register(4, 3)
 
     @no_debug_info_test
     @skipIf(archs=no_match(["aarch64"]))
     @skipIf(oslist=no_match(["linux"]))
-    def test_sve_registers_dynamic_config(self):
-        """Test AArch64 SVE registers multi-threaded dynamic resize."""
-        self.run_sve_test(Mode.SVE)
+    def test_za_register_dynamic_config_main_enabled(self):
+        """ Test multiple threads resizing ZA, with the main thread's ZA
+            enabled."""
+        self.za_test_impl(True)
 
     @no_debug_info_test
     @skipIf(archs=no_match(["aarch64"]))
     @skipIf(oslist=no_match(["linux"]))
-    def test_ssve_registers_dynamic_config(self):
-        """Test AArch64 SSVE registers multi-threaded dynamic resize."""
-        self.run_sve_test(Mode.SSVE)
+    def test_za_register_dynamic_config_main_disabled(self):
+        """ Test multiple threads resizing ZA, with the main thread's ZA
+            disabled."""
+        self.za_test_impl(False)
\ No newline at end of file
Index: lldb/test/API/commands/register/register/aarch64_za_reg/za_dynamic_resize/Makefile
===================================================================
--- /dev/null
+++ lldb/test/API/commands/register/register/aarch64_za_reg/za_dynamic_resize/Makefile
@@ -0,0 +1,5 @@
+C_SOURCES := main.c
+
+CFLAGS_EXTRAS := -march=armv8-a+sve+sme -lpthread
+
+include Makefile.rules
Index: lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py
===================================================================
--- lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py
+++ lldb/test/API/commands/register/register/aarch64_sve_registers/rw_access_dynamic_resize/TestSVEThreadedDynamic.py
@@ -98,6 +98,12 @@
 
         self.expect("register read ffr", substrs=[p_regs_value])
 
+    def build_for_mode(self, mode):
+        cflags = "-march=armv8-a+sve -lpthread"
+        if mode == Mode.SSVE:
+            cflags += " -DUSE_SSVE"
+        self.build(dictionary={"CFLAGS_EXTRAS": cflags})
+
     def run_sve_test(self, mode):
         if (mode == Mode.SVE) and not self.isAArch64SVE():
             self.skipTest("SVE registers must be supported.")
@@ -105,12 +111,8 @@
         if (mode == Mode.SSVE) and not self.isAArch64SME():
             self.skipTest("Streaming SVE registers must be supported.")
 
-        cflags = "-march=armv8-a+sve -lpthread"
-        if mode == Mode.SSVE:
-            cflags += " -DUSE_SSVE"
-        self.build(dictionary={"CFLAGS_EXTRAS": cflags})
+        self.build_for_mode(mode)
 
-        self.build()
         supported_vg = self.get_supported_vg()
 
         if not (2 in supported_vg and 4 in supported_vg):
@@ -200,3 +202,99 @@
     def test_ssve_registers_dynamic_config(self):
         """Test AArch64 SSVE registers multi-threaded dynamic resize."""
         self.run_sve_test(Mode.SSVE)
+
+    def setup_svg_test(self, mode):
+        # Even when running in SVE mode, we need access to SVG for these tests.
+        if not self.isAArch64SME():
+            self.skipTest("Streaming SVE registers must be present.")
+
+        self.build_for_mode(mode)
+
+        supported_vg = self.get_supported_vg()
+
+        main_thread_stop_line = line_number("main.c", "// Break in main thread")
+        lldbutil.run_break_set_by_file_and_line(self, "main.c", main_thread_stop_line)
+
+        self.runCmd("run", RUN_SUCCEEDED)
+
+        process = self.dbg.GetSelectedTarget().GetProcess()
+
+        self.expect(
+            "thread info 1",
+            STOPPED_DUE_TO_BREAKPOINT,
+            substrs=["stop reason = breakpoint"],
+        )
+
+        target = self.dbg.GetSelectedTarget()
+        process = target.GetProcess()
+
+        return process, supported_vg
+
+    def read_reg(self, process, regset, reg):
+        registerSets = process.GetThreadAtIndex(0).GetFrameAtIndex(0).GetRegisters()
+        sve_registers = registerSets.GetFirstValueByName(regset)
+        return sve_registers.GetChildMemberWithName(reg).GetValueAsUnsigned()
+
+    def read_vg(self, process):
+        return self.read_reg(process, "Scalable Vector Extension Registers", "vg")
+
+    def read_svg(self, process):
+        return self.read_reg(process, "Scalable Matrix Extension Registers", "svg")
+
+    def do_svg_test(self, process, vgs, expected_svgs):
+        for vg, svg in zip(vgs, expected_svgs):
+            self.runCmd("register write vg {}".format(vg))
+            self.assertEqual(svg, self.read_svg(process))
+
+    @no_debug_info_test
+    @skipIf(archs=no_match(["aarch64"]))
+    @skipIf(oslist=no_match(["linux"]))
+    def test_svg_sve_mode(self):
+        """ When in SVE mode, svg should remain constant as we change vg. """
+        process, supported_vg = self.setup_svg_test(Mode.SVE)
+        svg = self.read_svg(process)
+        self.do_svg_test(process, supported_vg, [svg]*len(supported_vg))
+
+    @no_debug_info_test
+    @skipIf(archs=no_match(["aarch64"]))
+    @skipIf(oslist=no_match(["linux"]))
+    def test_svg_ssve_mode(self):
+        """ When in SSVE mode, changing vg should change svg to the same value. """
+        process, supported_vg = self.setup_svg_test(Mode.SSVE)
+        self.do_svg_test(process, supported_vg, supported_vg)
+
+    @no_debug_info_test
+    @skipIf(archs=no_match(["aarch64"]))
+    @skipIf(oslist=no_match(["linux"]))
+    def test_sme_not_present(self):
+        """ When there is no SME, we should not show the SME register sets."""
+        if self.isAArch64SME():
+            self.skipTest("Streaming SVE registers must not be present.")
+
+        self.build_for_mode(Mode.SVE)
+
+        exe = self.getBuildArtifact("a.out")
+        self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET)
+
+        # This test may run on a non-sve system, but we'll stop before any
+        # SVE instruction would be run.
+        self.runCmd("b main")
+        self.runCmd("run", RUN_SUCCEEDED)
+
+        process = self.dbg.GetSelectedTarget().GetProcess()
+
+        self.expect(
+            "thread info 1",
+            STOPPED_DUE_TO_BREAKPOINT,
+            substrs=["stop reason = breakpoint"],
+        )
+
+        target = self.dbg.GetSelectedTarget()
+        process = target.GetProcess()
+
+        registerSets = process.GetThreadAtIndex(0).GetFrameAtIndex(0).GetRegisters()
+        sme_registers = registerSets.GetFirstValueByName("Scalable Matrix Extension Registers")
+        self.assertFalse(sme_registers.IsValid())
+
+        za = registerSets.GetFirstValueByName("Scalable Matrix Array Storage Registers")
+        self.assertFalse(za.IsValid())
Index: lldb/test/API/commands/register/register/aarch64_dynamic_regset/TestArm64DynamicRegsets.py
===================================================================
--- lldb/test/API/commands/register/register/aarch64_dynamic_regset/TestArm64DynamicRegsets.py
+++ lldb/test/API/commands/register/register/aarch64_dynamic_regset/TestArm64DynamicRegsets.py
@@ -70,15 +70,14 @@
         self.runCmd("register write ffr " + "'" + p_regs_value + "'")
         self.expect("register read ffr", substrs=[p_regs_value])
 
-    @no_debug_info_test
-    @skipIf(archs=no_match(["aarch64"]))
-    @skipIf(oslist=no_match(["linux"]))
-    def test_aarch64_dynamic_regset_config(self):
-        """Test AArch64 Dynamic Register sets configuration."""
+
+    def setup_register_config_test(self, run_args=None):
         self.build()
         self.line = line_number("main.c", "// Set a break point here.")
 
         exe = self.getBuildArtifact("a.out")
+        if run_args is not None:
+            self.runCmd("settings set target.run-args " + run_args)
         self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET)
 
         lldbutil.run_break_set_by_file_and_line(
@@ -97,7 +96,16 @@
         thread = process.GetThreadAtIndex(0)
         currentFrame = thread.GetFrameAtIndex(0)
 
-        for registerSet in currentFrame.GetRegisters():
+        return currentFrame.GetRegisters()
+
+    @no_debug_info_test
+    @skipIf(archs=no_match(["aarch64"]))
+    @skipIf(oslist=no_match(["linux"]))
+    def test_aarch64_dynamic_regset_config(self):
+        """Test AArch64 Dynamic Register sets configuration."""
+        register_sets = self.setup_register_config_test()
+
+        for registerSet in register_sets:
             if "Scalable Vector Extension Registers" in registerSet.GetName():
                 self.assertTrue(
                     self.isAArch64SVE(),
@@ -120,6 +128,20 @@
                 )
                 self.expect("register read data_mask", substrs=["data_mask = 0x"])
                 self.expect("register read code_mask", substrs=["code_mask = 0x"])
+            if "Scalable Matrix Extension Registers" in registerSet.GetName():
+                self.assertTrue(self.isAArch64SME(),
+                    "LLDB Enabled SME register set when it was disabled by target")
+            if "Scalable Matrix Array Storage Registers" in registerSet.GetName():
+                self.assertTrue(self.isAArch64SME(),
+                    "LLDB Enabled SME array storage register set when it was disabled by target.")
+
+    def make_za_value(self, vl, generator):
+        # Generate a vector value string "{0x00 0x01....}".
+        rows = []
+        for row in range(vl):
+            byte = "0x{:02x}".format(generator(row))
+            rows.append(" ".join([byte]*vl))
+        return "{" + " ".join(rows) + "}"
 
     @no_debug_info_test
     @skipIf(archs=no_match(["aarch64"]))
@@ -130,32 +152,58 @@
         if not self.isAArch64SME():
             self.skipTest("SME must be present.")
 
-        self.build()
-        self.line = line_number("main.c", "// Set a break point here.")
-
-        exe = self.getBuildArtifact("a.out")
-        self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET)
-
-        lldbutil.run_break_set_by_file_and_line(
-            self, "main.c", self.line, num_expected_locations=1
-        )
-        self.runCmd("settings set target.run-args sme")
-        self.runCmd("run", RUN_SUCCEEDED)
-
-        self.expect(
-            "thread backtrace",
-            STOPPED_DUE_TO_BREAKPOINT,
-            substrs=["stop reason = breakpoint 1."],
-        )
-
-        target = self.dbg.GetSelectedTarget()
-        process = target.GetProcess()
-        thread = process.GetThreadAtIndex(0)
-        currentFrame = thread.GetFrameAtIndex(0)
-
-        register_sets = currentFrame.GetRegisters()
+        register_sets = self.setup_register_config_test("sme")
 
         ssve_registers = register_sets.GetFirstValueByName(
             "Scalable Vector Extension Registers")
         self.assertTrue(ssve_registers.IsValid())
         self.sve_regs_read_dynamic(ssve_registers)
+
+        za_register = register_sets.GetFirstValueByName(
+            "Scalable Matrix Array Storage Registers")
+        self.assertTrue(za_register.IsValid())
+        vg = ssve_registers.GetChildMemberWithName("vg").GetValueAsUnsigned()
+        vl = vg * 8
+        # When first enabled it is all 0s.
+        self.expect("register read za", substrs=[self.make_za_value(vl, lambda r: 0)])
+        za_value = self.make_za_value(vl, lambda r:r+1)
+        self.runCmd("register write za '{}'".format(za_value))
+        self.expect("register read za", substrs=[za_value])
+
+        # SVG should match VG because we're in streaming mode.
+        sme_registers = register_sets.GetFirstValueByName(
+            "Scalable Matrix Extension Registers")
+        self.assertTrue(sme_registers.IsValid())
+        svg = sme_registers.GetChildMemberWithName("svg").GetValueAsUnsigned()
+        self.assertEqual(vg, svg)
+
+    @no_debug_info_test
+    @skipIf(archs=no_match(["aarch64"]))
+    @skipIf(oslist=no_match(["linux"]))
+    def test_aarch64_dynamic_regset_config_sme_za_disabled(self):
+        """Test that ZA shows as 0s when disabled and can be enabled by writing
+           to it."""
+        if not self.isAArch64SME():
+            self.skipTest("SME must be present.")
+
+        # No argument, so ZA will be disabled when we break.
+        register_sets = self.setup_register_config_test()
+
+        # vg is the non-streaming vg as we are in non-streaming mode, so we need
+        # to use svg.
+        sme_registers = register_sets.GetFirstValueByName(
+            "Scalable Matrix Extension Registers")
+        self.assertTrue(sme_registers.IsValid())
+        svg = sme_registers.GetChildMemberWithName("svg").GetValueAsUnsigned()
+
+        za_register = register_sets.GetFirstValueByName(
+            "Scalable Matrix Array Storage Registers")
+        self.assertTrue(za_register.IsValid())
+        svl = svg * 8
+        # A disabled ZA is shown as all 0s.
+        self.expect("register read za", substrs=[self.make_za_value(svl, lambda r: 0)])
+        za_value = self.make_za_value(svl, lambda r:r+1)
+        # Writing to it enables ZA, so the value should be there when we read
+        # it back.
+        self.runCmd("register write za '{}'".format(za_value))
+        self.expect("register read za", substrs=[za_value])
Index: lldb/source/Target/DynamicRegisterInfo.cpp
===================================================================
--- lldb/source/Target/DynamicRegisterInfo.cpp
+++ lldb/source/Target/DynamicRegisterInfo.cpp
@@ -614,10 +614,11 @@
   ConfigureOffsets();
 
   // Check if register info is reconfigurable
-  // AArch64 SVE register set has configurable register sizes
+  // AArch64 SVE register set has configurable register sizes, as does the ZA
+  // register that SME added (the streaming state of SME reuses the SVE state).
   if (arch.GetTriple().isAArch64()) {
     for (const auto &reg : m_regs) {
-      if (strcmp(reg.name, "vg") == 0) {
+      if ((strcmp(reg.name, "vg") == 0) || (strcmp(reg.name, "svg") == 0)) {
         m_is_reconfigurable = true;
         break;
       }
Index: lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp
===================================================================
--- lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp
+++ lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp
@@ -1663,17 +1663,19 @@
     gdb_thread->PrivateSetRegisterValue(lldb_regnum, buffer_sp->GetData());
   }
 
-  // AArch64 SVE specific code below calls AArch64SVEReconfigure to update
-  // SVE register sizes and offsets if value of VG register has changed
-  // since last stop.
+  // AArch64 SVE/SME specific code below updates SVE and ZA register sizes and
+  // offsets if value of VG or SVG registers has changed since last stop.
   const ArchSpec &arch = GetTarget().GetArchitecture();
   if (arch.IsValid() && arch.GetTriple().isAArch64()) {
     GDBRemoteRegisterContext *reg_ctx_sp =
         static_cast<GDBRemoteRegisterContext *>(
             gdb_thread->GetRegisterContext().get());
 
-    if (reg_ctx_sp)
+    if (reg_ctx_sp) {
       reg_ctx_sp->AArch64SVEReconfigure();
+      reg_ctx_sp->AArch64SMEReconfigure();
+      reg_ctx_sp->InvalidateAllRegisters();
+    }
   }
 
   thread_sp->SetName(thread_name.empty() ? nullptr : thread_name.c_str());
Index: lldb/source/Plugins/Process/gdb-remote/GDBRemoteRegisterContext.h
===================================================================
--- lldb/source/Plugins/Process/gdb-remote/GDBRemoteRegisterContext.h
+++ lldb/source/Plugins/Process/gdb-remote/GDBRemoteRegisterContext.h
@@ -39,6 +39,7 @@
   ~GDBRemoteDynamicRegisterInfo() override = default;
 
   void UpdateARM64SVERegistersInfos(uint64_t vg);
+  void UpdateARM64SMERegistersInfos(uint64_t vg);
 };
 
 class GDBRemoteRegisterContext : public RegisterContext {
@@ -77,7 +78,9 @@
   uint32_t ConvertRegisterKindToRegisterNumber(lldb::RegisterKind kind,
                                                uint32_t num) override;
 
-  bool AArch64SVEReconfigure();
+  void AArch64SVEReconfigure();
+
+  void AArch64SMEReconfigure();
 
 protected:
   friend class ThreadGDBRemote;
Index: lldb/source/Plugins/Process/gdb-remote/GDBRemoteRegisterContext.cpp
===================================================================
--- lldb/source/Plugins/Process/gdb-remote/GDBRemoteRegisterContext.cpp
+++ lldb/source/Plugins/Process/gdb-remote/GDBRemoteRegisterContext.cpp
@@ -373,14 +373,14 @@
   if (dst == nullptr)
     return false;
 
-  // Code below is specific to AArch64 target in SVE state
+  // Code below is specific to AArch64 target in SVE or SMEstate
   // If vector granule (vg) register is being written then thread's
   // register context reconfiguration is triggered on success.
-  bool do_reconfigure_arm64_sve = false;
+  // We do not allow writes to SVG so it is not mentioned here.
   const ArchSpec &arch = process->GetTarget().GetArchitecture();
-  if (arch.IsValid() && arch.GetTriple().isAArch64())
-    if (strcmp(reg_info->name, "vg") == 0)
-      do_reconfigure_arm64_sve = true;
+  bool do_reconfigure_arm64_sve = arch.IsValid() &&
+                                  arch.GetTriple().isAArch64() &&
+                                  (strcmp(reg_info->name, "vg") == 0);
 
   if (data.CopyByteOrderedData(data_offset,                // src offset
                                reg_info->byte_size,        // src length
@@ -400,10 +400,12 @@
                 {m_reg_data.GetDataStart(), size_t(m_reg_data.GetByteSize())}))
 
         {
-          SetAllRegisterValid(false);
-
-          if (do_reconfigure_arm64_sve)
+          if (do_reconfigure_arm64_sve) {
             AArch64SVEReconfigure();
+            AArch64SMEReconfigure();
+          }
+
+          InvalidateAllRegisters();
 
           return true;
         }
@@ -435,8 +437,11 @@
           // This is an actual register, write it
           success = SetPrimordialRegister(reg_info, gdb_comm);
 
-          if (success && do_reconfigure_arm64_sve)
+          if (success && do_reconfigure_arm64_sve) {
             AArch64SVEReconfigure();
+            AArch64SMEReconfigure();
+            InvalidateAllRegisters();
+          }
         }
 
         // Check if writing this register will invalidate any other register
@@ -760,37 +765,51 @@
   return m_reg_info_sp->ConvertRegisterKindToRegisterNumber(kind, num);
 }
 
-bool GDBRemoteRegisterContext::AArch64SVEReconfigure() {
-  if (!m_reg_info_sp)
-    return false;
-
+void GDBRemoteRegisterContext::AArch64SVEReconfigure() {
+  assert(m_reg_info_sp);
   const RegisterInfo *reg_info = m_reg_info_sp->GetRegisterInfo("vg");
-  if (!reg_info)
-    return false;
+  assert(reg_info);
 
   uint64_t fail_value = LLDB_INVALID_ADDRESS;
   uint32_t vg_reg_num = reg_info->kinds[eRegisterKindLLDB];
   uint64_t vg_reg_value = ReadRegisterAsUnsigned(vg_reg_num, fail_value);
 
   if (vg_reg_value == fail_value || vg_reg_value > 32)
-    return false;
+    return;
 
   reg_info = m_reg_info_sp->GetRegisterInfo("p0");
   // Predicate registers have 1 bit per byte in the vector so their size is
   // VL / 8. VG is in units of 8 bytes already, so if the size of p0 == VG
   // already, we do not have to reconfigure.
   if (!reg_info || vg_reg_value == reg_info->byte_size)
-    return false;
+    return;
 
   m_reg_info_sp->UpdateARM64SVERegistersInfos(vg_reg_value);
   // Make a heap based buffer that is big enough to store all registers
   m_reg_data.SetData(std::make_shared<DataBufferHeap>(
       m_reg_info_sp->GetRegisterDataByteSize(), 0));
   m_reg_data.SetByteOrder(GetByteOrder());
+}
 
-  InvalidateAllRegisters();
+void GDBRemoteRegisterContext::AArch64SMEReconfigure() {
+  assert(m_reg_info_sp);
+  const RegisterInfo *reg_info = m_reg_info_sp->GetRegisterInfo("svg");
+  // Target does not have SME, nothing for us to reconfigure.
+  if (!reg_info)
+    return;
 
-  return true;
+  uint64_t fail_value = LLDB_INVALID_ADDRESS;
+  uint32_t svg_reg_num = reg_info->kinds[eRegisterKindLLDB];
+  uint64_t svg_reg_value = ReadRegisterAsUnsigned(svg_reg_num, fail_value);
+
+  if (svg_reg_value == LLDB_INVALID_ADDRESS || svg_reg_value > 32)
+    return;
+
+  m_reg_info_sp->UpdateARM64SMERegistersInfos(svg_reg_value);
+  // Make a heap based buffer that is big enough to store all registers
+  m_reg_data.SetData(std::make_shared<DataBufferHeap>(
+      m_reg_info_sp->GetRegisterDataByteSize(), 0));
+  m_reg_data.SetByteOrder(GetByteOrder());
 }
 
 void GDBRemoteDynamicRegisterInfo::UpdateARM64SVERegistersInfos(uint64_t vg) {
@@ -815,3 +834,15 @@
   // Re-calculate register offsets
   ConfigureOffsets();
 }
+
+void GDBRemoteDynamicRegisterInfo::UpdateARM64SMERegistersInfos(uint64_t svg) {
+  for (auto &reg : m_regs) {
+    if (strcmp(reg.name, "za") == 0) {
+      // ZA is a register with size (svg*8) * (svg*8). A square essentially.
+      reg.byte_size = (svg * 8) * (svg * 8);
+    }
+    reg.byte_offset = LLDB_INVALID_INDEX32;
+  }
+
+  ConfigureOffsets();
+}
Index: lldb/source/Plugins/Process/elf-core/RegisterUtilities.h
===================================================================
--- lldb/source/Plugins/Process/elf-core/RegisterUtilities.h
+++ lldb/source/Plugins/Process/elf-core/RegisterUtilities.h
@@ -119,6 +119,10 @@
     {llvm::Triple::Linux, llvm::Triple::aarch64, llvm::ELF::NT_ARM_SVE},
 };
 
+constexpr RegsetDesc AARCH64_ZA_Desc[] = {
+    {llvm::Triple::Linux, llvm::Triple::aarch64, llvm::ELF::NT_ARM_ZA},
+};
+
 constexpr RegsetDesc AARCH64_PAC_Desc[] = {
     {llvm::Triple::Linux, llvm::Triple::aarch64, llvm::ELF::NT_ARM_PAC_MASK},
 };
Index: lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h
===================================================================
--- lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h
+++ lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h
@@ -30,6 +30,7 @@
     eRegsetMaskPAuth = 4,
     eRegsetMaskMTE = 8,
     eRegsetMaskTLS = 16,
+    eRegsetMaskZA = 32,
     eRegsetMaskDynamic = ~1,
   };
 
@@ -106,8 +107,14 @@
 
   void AddRegSetTLS(bool has_tpidr2);
 
+  void AddRegSetZA();
+
+  void AddRegSetSME();
+
   uint32_t ConfigureVectorLength(uint32_t sve_vq);
 
+  void ConfigureVectorLengthZA(uint32_t za_vq);
+
   bool VectorSizeIsValid(uint32_t vq) {
     // coverity[unsigned_compare]
     if (vq >= eVectorQuadwordAArch64 && vq <= eVectorQuadwordAArch64SVEMax)
@@ -117,6 +124,7 @@
 
   bool IsSVEEnabled() const { return m_opt_regsets.AnySet(eRegsetMaskSVE); }
   bool IsSSVEEnabled() const { return m_opt_regsets.AnySet(eRegsetMaskSSVE); }
+  bool IsZAEnabled() const { return m_opt_regsets.AnySet(eRegsetMaskZA); }
   bool IsPAuthEnabled() const { return m_opt_regsets.AnySet(eRegsetMaskPAuth); }
   bool IsMTEEnabled() const { return m_opt_regsets.AnySet(eRegsetMaskMTE); }
   bool IsTLSEnabled() const { return m_opt_regsets.AnySet(eRegsetMaskTLS); }
@@ -128,15 +136,20 @@
   bool IsPAuthReg(unsigned reg) const;
   bool IsMTEReg(unsigned reg) const;
   bool IsTLSReg(unsigned reg) const;
+  bool IsZAReg(unsigned reg) const;
+  bool IsSMEReg(unsigned reg) const;
 
   uint32_t GetRegNumSVEZ0() const;
   uint32_t GetRegNumSVEFFR() const;
   uint32_t GetRegNumFPCR() const;
   uint32_t GetRegNumFPSR() const;
   uint32_t GetRegNumSVEVG() const;
+  uint32_t GetRegNumSMEVG() const;
   uint32_t GetPAuthOffset() const;
   uint32_t GetMTEOffset() const;
   uint32_t GetTLSOffset() const;
+  uint32_t GetZAOffset() const;
+  uint32_t GetSMEOffset() const;
 
 private:
   typedef std::map<uint32_t, std::vector<lldb_private::RegisterInfo>>
@@ -145,7 +158,10 @@
   per_vq_register_infos m_per_vq_reg_infos;
 
   uint32_t m_vector_reg_vq = eVectorQuadwordAArch64;
+  uint32_t m_za_reg_vq = eVectorQuadwordAArch64;
 
+  // In normal operation this is const. Only when SVE or SME registers change
+  // size is it either replaced or the content modified.
   const lldb_private::RegisterInfo *m_register_info_p;
   uint32_t m_register_info_count;
 
@@ -164,6 +180,8 @@
   std::vector<uint32_t> pauth_regnum_collection;
   std::vector<uint32_t> m_mte_regnum_collection;
   std::vector<uint32_t> m_tls_regnum_collection;
+  std::vector<uint32_t> m_za_regnum_collection;
+  std::vector<uint32_t> m_sme_regnum_collection;
 };
 
 #endif
Index: lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp
===================================================================
--- lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp
+++ lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.cpp
@@ -83,6 +83,14 @@
     // Only present when SME is present
     DEFINE_EXTENSION_REG(tpidr2)};
 
+static lldb_private::RegisterInfo g_register_infos_za[] =
+    // 16 is a default size we will change later.
+    {{"za", nullptr, 16, 0, lldb::eEncodingVector, lldb::eFormatVectorOfUInt8,
+      KIND_ALL_INVALID, nullptr, nullptr, nullptr}};
+
+static lldb_private::RegisterInfo g_register_infos_sme[] = {
+    DEFINE_EXTENSION_REG(svg)};
+
 // Number of register sets provided by this context.
 enum {
   k_num_gpr_registers = gpr_w28 - gpr_x0 + 1,
@@ -91,6 +99,8 @@
   k_num_mte_register = 1,
   // Number of TLS registers is dynamic so it is not listed here.
   k_num_pauth_register = 2,
+  k_num_za_register = 1,
+  k_num_sme_register = 1,
   k_num_register_sets_default = 2,
   k_num_register_sets = 3
 };
@@ -197,6 +207,13 @@
 
 // The size of the TLS set is dynamic, so not listed here.
 
+static const lldb_private::RegisterSet g_reg_set_za_arm64 = {
+    "Scalable Matrix Array Storage Registers", "za", k_num_za_register,
+    nullptr};
+
+static const lldb_private::RegisterSet g_reg_set_sme_arm64 = {
+    "Scalable Matrix Extension Registers", "sme", k_num_sme_register, nullptr};
+
 RegisterInfoPOSIX_arm64::RegisterInfoPOSIX_arm64(
     const lldb_private::ArchSpec &target_arch, lldb_private::Flags opt_regsets)
     : lldb_private::RegisterInfoAndSetInterface(target_arch),
@@ -241,6 +258,11 @@
       // present.
       AddRegSetTLS(m_opt_regsets.AllSet(eRegsetMaskSSVE));
 
+      if (m_opt_regsets.AnySet(eRegsetMaskSSVE)) {
+        AddRegSetZA();
+        AddRegSetSME();
+      }
+
       m_register_info_count = m_dynamic_reg_infos.size();
       m_register_info_p = m_dynamic_reg_infos.data();
       m_register_set_p = m_dynamic_reg_sets.data();
@@ -344,6 +366,40 @@
   m_dynamic_reg_sets.back().registers = m_tls_regnum_collection.data();
 }
 
+void RegisterInfoPOSIX_arm64::AddRegSetZA() {
+  uint32_t za_regnum = m_dynamic_reg_infos.size();
+  m_za_regnum_collection.push_back(za_regnum);
+
+  m_dynamic_reg_infos.push_back(g_register_infos_za[0]);
+  m_dynamic_reg_infos[za_regnum].byte_offset =
+      m_dynamic_reg_infos[za_regnum - 1].byte_offset +
+      m_dynamic_reg_infos[za_regnum - 1].byte_size;
+  m_dynamic_reg_infos[za_regnum].kinds[lldb::eRegisterKindLLDB] = za_regnum;
+
+  m_per_regset_regnum_range[m_register_set_count] =
+      std::make_pair(za_regnum, za_regnum + 1);
+  m_dynamic_reg_sets.push_back(g_reg_set_za_arm64);
+  m_dynamic_reg_sets.back().registers = m_za_regnum_collection.data();
+}
+
+void RegisterInfoPOSIX_arm64::AddRegSetSME() {
+  uint32_t sme_regnum = m_dynamic_reg_infos.size();
+  for (uint32_t i = 0; i < k_num_sme_register; i++) {
+    m_sme_regnum_collection.push_back(sme_regnum + i);
+    m_dynamic_reg_infos.push_back(g_register_infos_sme[i]);
+    m_dynamic_reg_infos[sme_regnum + i].byte_offset =
+        m_dynamic_reg_infos[sme_regnum + i - 1].byte_offset +
+        m_dynamic_reg_infos[sme_regnum + i - 1].byte_size;
+    m_dynamic_reg_infos[sme_regnum + i].kinds[lldb::eRegisterKindLLDB] =
+        sme_regnum + i;
+  }
+
+  m_per_regset_regnum_range[m_register_set_count] =
+      std::make_pair(sme_regnum, m_dynamic_reg_infos.size());
+  m_dynamic_reg_sets.push_back(g_reg_set_sme_arm64);
+  m_dynamic_reg_sets.back().registers = m_sme_regnum_collection.data();
+}
+
 uint32_t RegisterInfoPOSIX_arm64::ConfigureVectorLength(uint32_t sve_vq) {
   // sve_vq contains SVE Quad vector length in context of AArch64 SVE.
   // SVE register infos if enabled cannot be disabled by selecting sve_vq = 0.
@@ -408,6 +464,20 @@
   return m_vector_reg_vq;
 }
 
+void RegisterInfoPOSIX_arm64::ConfigureVectorLengthZA(uint32_t za_vq) {
+  if (!VectorSizeIsValid(za_vq) || m_za_reg_vq == za_vq)
+    return;
+
+  m_za_reg_vq = za_vq;
+
+  // For SVE changes, we replace m_register_info_p completely. ZA is in a
+  // dynamic set and is just 1 register so we make an exception to const here.
+  lldb_private::RegisterInfo *non_const_reginfo =
+      const_cast<lldb_private::RegisterInfo *>(m_register_info_p);
+  non_const_reginfo[m_za_regnum_collection[0]].byte_size =
+      (za_vq * 16) * (za_vq * 16);
+}
+
 bool RegisterInfoPOSIX_arm64::IsSVEReg(unsigned reg) const {
   if (m_vector_reg_vq > eVectorQuadwordAArch64)
     return (sve_vg <= reg && reg <= sve_ffr);
@@ -439,6 +509,14 @@
   return llvm::is_contained(m_tls_regnum_collection, reg);
 }
 
+bool RegisterInfoPOSIX_arm64::IsZAReg(unsigned reg) const {
+  return llvm::is_contained(m_za_regnum_collection, reg);
+}
+
+bool RegisterInfoPOSIX_arm64::IsSMEReg(unsigned reg) const {
+  return llvm::is_contained(m_sme_regnum_collection, reg);
+}
+
 uint32_t RegisterInfoPOSIX_arm64::GetRegNumSVEZ0() const { return sve_z0; }
 
 uint32_t RegisterInfoPOSIX_arm64::GetRegNumSVEFFR() const { return sve_ffr; }
@@ -449,6 +527,10 @@
 
 uint32_t RegisterInfoPOSIX_arm64::GetRegNumSVEVG() const { return sve_vg; }
 
+uint32_t RegisterInfoPOSIX_arm64::GetRegNumSMEVG() const {
+  return m_sme_regnum_collection[0];
+}
+
 uint32_t RegisterInfoPOSIX_arm64::GetPAuthOffset() const {
   return m_register_info_p[pauth_regnum_collection[0]].byte_offset;
 }
@@ -460,3 +542,11 @@
 uint32_t RegisterInfoPOSIX_arm64::GetTLSOffset() const {
   return m_register_info_p[m_tls_regnum_collection[0]].byte_offset;
 }
+
+uint32_t RegisterInfoPOSIX_arm64::GetZAOffset() const {
+  return m_register_info_p[m_za_regnum_collection[0]].byte_offset;
+}
+
+uint32_t RegisterInfoPOSIX_arm64::GetSMEOffset() const {
+  return m_register_info_p[m_sme_regnum_collection[0]].byte_offset;
+}
Index: lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm64.h
===================================================================
--- lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm64.h
+++ lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm64.h
@@ -54,6 +54,7 @@
   size_t GetFPUSize() { return sizeof(RegisterInfoPOSIX_arm64::FPU); }
 
   bool IsSVE(unsigned reg) const;
+  bool IsZA(unsigned reg) const;
   bool IsPAuth(unsigned reg) const;
   bool IsTLS(unsigned reg) const;
 
Index: lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm64.cpp
===================================================================
--- lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm64.cpp
+++ lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm64.cpp
@@ -43,6 +43,10 @@
   return m_register_info_up->IsSVEReg(reg);
 }
 
+bool RegisterContextPOSIX_arm64::IsZA(unsigned reg) const {
+  return m_register_info_up->IsZAReg(reg);
+}
+
 bool RegisterContextPOSIX_arm64::IsPAuth(unsigned reg) const {
   return m_register_info_up->IsPAuthReg(reg);
 }
Index: lldb/source/Plugins/Process/Utility/LinuxPTraceDefines_arm64sve.h
===================================================================
--- lldb/source/Plugins/Process/Utility/LinuxPTraceDefines_arm64sve.h
+++ lldb/source/Plugins/Process/Utility/LinuxPTraceDefines_arm64sve.h
@@ -152,6 +152,8 @@
   uint16_t reserved;
 };
 
+using user_za_header = user_sve_header;
+
 /* Definitions for user_sve_header.flags: */
 const uint16_t ptrace_regs_mask = 1 << 0;
 const uint16_t ptrace_regs_fpsimd = 0;
Index: lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.h
===================================================================
--- lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.h
+++ lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.h
@@ -85,6 +85,8 @@
   bool m_mte_ctrl_is_valid;
 
   bool m_sve_header_is_valid;
+  bool m_za_buffer_is_valid;
+  bool m_za_header_is_valid;
   bool m_pac_mask_is_valid;
   bool m_tls_is_valid;
   size_t m_tls_size;
@@ -98,6 +100,9 @@
   struct sve::user_sve_header m_sve_header;
   std::vector<uint8_t> m_sve_ptrace_payload;
 
+  sve::user_za_header m_za_header;
+  std::vector<uint8_t> m_za_ptrace_payload;
+
   bool m_refresh_hwdebug_info;
 
   struct user_pac_mask {
@@ -109,6 +114,12 @@
 
   uint64_t m_mte_ctrl_reg;
 
+  struct sme_regs {
+    uint64_t svg_reg;
+  };
+
+  struct sme_regs m_sme_regs;
+
   struct tls_regs {
     uint64_t tpidr_reg;
     // Only valid when SME is present.
@@ -139,10 +150,24 @@
 
   Status WriteTLS();
 
+  Status ReadSMESVG();
+
+  Status ReadZAHeader();
+
+  Status ReadZA();
+
+  Status WriteZA();
+
+  // No WriteZAHeader because writing only the header will disable ZA.
+  // Instead use WriteZA and ensure you have the correct ZA buffer size set
+  // beforehand if you wish to disable it.
+
   bool IsSVE(unsigned reg) const;
+  bool IsZA(unsigned reg) const;
   bool IsPAuth(unsigned reg) const;
   bool IsMTE(unsigned reg) const;
   bool IsTLS(unsigned reg) const;
+  bool IsSME(unsigned reg) const;
 
   uint64_t GetSVERegVG() { return m_sve_header.vl / 8; }
 
@@ -150,12 +175,18 @@
 
   void *GetSVEHeader() { return &m_sve_header; }
 
+  void *GetZAHeader() { return &m_za_header; }
+
+  size_t GetZAHeaderSize() { return sizeof(m_za_header); }
+
   void *GetPACMask() { return &m_pac_mask; }
 
   void *GetMTEControl() { return &m_mte_ctrl_reg; }
 
   void *GetTLSBuffer() { return &m_tls_regs; }
 
+  void *GetSMEBuffer() { return &m_sme_regs; }
+
   void *GetSVEBuffer() { return m_sve_ptrace_payload.data(); }
 
   size_t GetSVEHeaderSize() { return sizeof(m_sve_header); }
@@ -166,10 +197,16 @@
 
   unsigned GetSVERegSet();
 
+  void *GetZABuffer() { return m_za_ptrace_payload.data(); };
+
+  size_t GetZABufferSize() { return m_za_ptrace_payload.size(); }
+
   size_t GetMTEControlSize() { return sizeof(m_mte_ctrl_reg); }
 
   size_t GetTLSBufferSize() { return m_tls_size; }
 
+  size_t GetSMEBufferSize() { return sizeof(m_sme_regs); }
+
   llvm::Error ReadHardwareDebugInfo() override;
 
   llvm::Error WriteHardwareDebugRegs(DREGType hwbType) override;
Index: lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.cpp
===================================================================
--- lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.cpp
+++ lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.cpp
@@ -41,6 +41,10 @@
   0x40b /* ARM Scalable Matrix Extension, Streaming SVE mode */
 #endif
 
+#ifndef NT_ARM_ZA
+#define NT_ARM_ZA 0x40c /* ARM Scalable Matrix Extension, Array Storage */
+#endif
+
 #ifndef NT_ARM_PAC_MASK
 #define NT_ARM_PAC_MASK 0x406 /* Pointer authentication code masks */
 #endif
@@ -90,6 +94,16 @@
         opt_regsets.Set(RegisterInfoPOSIX_arm64::eRegsetMaskSSVE);
     }
 
+    sve::user_za_header za_header;
+    ioVec.iov_base = &za_header;
+    ioVec.iov_len = sizeof(za_header);
+    regset = NT_ARM_ZA;
+    if (NativeProcessLinux::PtraceWrapper(PTRACE_GETREGSET,
+                                          native_thread.GetID(), &regset,
+                                          &ioVec, sizeof(za_header))
+            .Success())
+      opt_regsets.Set(RegisterInfoPOSIX_arm64::eRegsetMaskZA);
+
     NativeProcessLinux &process = native_thread.GetProcess();
 
     std::optional<uint64_t> auxv_at_hwcap =
@@ -133,6 +147,7 @@
   ::memset(&m_sve_header, 0, sizeof(m_sve_header));
   ::memset(&m_pac_mask, 0, sizeof(m_pac_mask));
   ::memset(&m_tls_regs, 0, sizeof(m_tls_regs));
+  ::memset(&m_sme_regs, 0, sizeof(m_sme_regs));
 
   m_mte_ctrl_reg = 0;
 
@@ -314,6 +329,39 @@
     offset = reg_info->byte_offset - GetRegisterInfo().GetMTEOffset();
     assert(offset < GetMTEControlSize());
     src = (uint8_t *)GetMTEControl() + offset;
+  } else if (IsZA(reg)) {
+    error = ReadZAHeader();
+    if (error.Fail())
+      return error;
+
+    // If there is only a header and no registers, ZA is inactive. Read as 0
+    // in this case.
+    if (m_za_header.size == sizeof(m_za_header)) {
+      // This will get reconfigured/reset later, so we are safe to use it.
+      // ZA is a square of VL * VL and the ptrace buffer also includes the
+      // header itself.
+      m_za_ptrace_payload.resize(((m_za_header.vl) * (m_za_header.vl)) +
+                                 GetZAHeaderSize());
+      std::fill(m_za_ptrace_payload.begin(), m_za_ptrace_payload.end(), 0);
+    } else {
+      // ZA is active, read the real register.
+      error = ReadZA();
+      if (error.Fail())
+        return error;
+    }
+
+    offset = reg_info->byte_offset - GetRegisterInfo().GetZAOffset() +
+             GetZAHeaderSize();
+    assert(offset < GetZABufferSize());
+    src = (uint8_t *)GetZABuffer() + offset;
+  } else if (IsSME(reg)) {
+    error = ReadSMESVG();
+    if (error.Fail())
+      return error;
+
+    offset = reg_info->byte_offset - GetRegisterInfo().GetSMEOffset();
+    assert(offset < GetSMEBufferSize());
+    src = (uint8_t *)GetSMEBuffer() + offset;
   } else
     return Status("failed - register wasn't recognized to be a GPR or an FPR, "
                   "write strategy unknown");
@@ -420,8 +468,12 @@
           SetSVERegVG(vg_value);
 
           error = WriteSVEHeader();
-          if (error.Success())
+          if (error.Success()) {
+            // Changing VG during streaming mode also changes the size of ZA.
+            if (m_sve_state == SVEState::Streaming)
+              m_za_header_is_valid = false;
             ConfigureRegisterContext();
+          }
 
           if (m_sve_header_is_valid && vg_value == GetSVERegVG())
             return error;
@@ -494,6 +546,23 @@
     ::memcpy(dst, reg_value.GetBytes(), reg_info->byte_size);
 
     return WriteTLS();
+  } else if (IsZA(reg)) {
+    error = ReadZA();
+    if (error.Fail())
+      return error;
+
+    offset = reg_info->byte_offset - GetRegisterInfo().GetZAOffset() +
+             GetZAHeaderSize();
+    assert(offset < GetZABufferSize());
+    dst = (uint8_t *)GetZABuffer() + offset;
+    ::memcpy(dst, reg_value.GetBytes(), reg_info->byte_size);
+
+    // While this is writing a header that contains a vector length, the only
+    // way to change that is via the vg register. So here we assume the length
+    // will always be the current length and no reconfigure is needed.
+    return WriteZA();
+  } else if (IsSME(reg)) {
+    return Status("Writing to SVG is not supported.");
   }
 
   return Status("Failed to write register value");
@@ -503,8 +572,11 @@
   GPR,
   SVE, // Used for SVE and SSVE.
   FPR, // When there is no SVE, or SVE in FPSIMD mode.
+  // Pointer authentication registers are read only, so not included here.
   MTE,
   TLS,
+  ZA,
+  // SME pseudo registers are read only.
 };
 
 static uint8_t *AddSavedRegistersKind(uint8_t *dst, SavedRegistersKind kind) {
@@ -527,8 +599,9 @@
     lldb::WritableDataBufferSP &data_sp) {
   // AArch64 register data must contain GPRs and either FPR or SVE registers.
   // SVE registers can be non-streaming (aka SVE) or streaming (aka SSVE).
-  // Finally an optional MTE register. Pointer Authentication (PAC) registers
-  // are read-only and will be skipped.
+  // Followed optionally by MTE, TLS and ZA register(s). SME pseudo registers
+  // are derived from other data, and Pointer Authentication (PAC) registers
+  // are read-only, so they are all skipped.
 
   // In order to create register data checkpoint we first read all register
   // values if not done already and calculate total size of register set data.
@@ -541,6 +614,22 @@
   if (error.Fail())
     return error;
 
+  // Here this means, does the system have ZA, not whether it is active.
+  if (GetRegisterInfo().IsZAEnabled()) {
+    error = ReadZAHeader();
+    if (error.Fail())
+      return error;
+    // Use header size here because the buffer may contain fake data when ZA is
+    // disabled.
+    reg_data_byte_size += sizeof(SavedRegistersKind) + m_za_header.size;
+    // For the same reason, we need to force it to be re-read so that it will
+    // always contain the real header.
+    m_za_buffer_is_valid = false;
+    error = ReadZA();
+    if (error.Fail())
+      return error;
+  }
+
   // If SVE is enabled we need not copy FPR separately.
   if (GetRegisterInfo().IsSVEEnabled() || GetRegisterInfo().IsSSVEEnabled()) {
     // Store mode and register data.
@@ -573,6 +662,45 @@
   dst = AddSavedRegisters(dst, SavedRegistersKind::GPR, GetGPRBuffer(),
                           GetGPRBufferSize());
 
+  // Streaming SVE and the ZA register both use the streaming vector length.
+  // When you change this, the kernel will invalidate parts of the process
+  // state. Therefore we need a specific order of restoration for each mode, if
+  // we also have ZA to restore.
+  //
+  // Streaming mode enabled, ZA enabled:
+  // * Write streaming registers. This sets SVCR.SM and clears SVCR.ZA.
+  // * Write ZA, this set SVCR.ZA. The register data we provide is written to
+  // ZA.
+  // * Result is SVCR.SM and SVCR.ZA set, with the expected data in both
+  //   register sets.
+  //
+  // Streaming mode disabled, ZA enabled:
+  // * Write ZA. This sets SVCR.ZA, and the ZA content. In the majority of cases
+  //   the streaming vector length is changing, so the thread is converted into
+  //   an FPSIMD thread if it is not already one. This also clears SVCR.SM.
+  // * Write SVE registers, which also clears SVCR.SM but most importantly, puts
+  //   us into full SVE mode instead of FPSIMD mode (where the registers are
+  //   actually the 128 bit Neon registers).
+  // * Result is we have SVCR.SM = 0, SVCR.ZA = 1 and the expected register
+  //   state.
+  //
+  // Restoring in different orders leads to things like the SVE registers being
+  // truncated due to the FPSIMD mode and ZA being disabled or filled with 0s
+  // (disabled and 0s looks the same from inside lldb since we fake the value
+  // when it's disabled).
+  //
+  // For more information on this, look up the uses of the relevant NT_ARM_
+  // constants and the functions vec_set_vector_length, sve_set_common and
+  // za_set in the Linux Kernel.
+
+  if ((m_sve_state != SVEState::Streaming) && GetRegisterInfo().IsZAEnabled()) {
+    // Use the header size not the buffer size, as we may be using the buffer
+    // for fake data, which we do not want to write out.
+    assert(m_za_header.size <= GetZABufferSize());
+    dst = AddSavedRegisters(dst, SavedRegistersKind::ZA, GetZABuffer(),
+                            m_za_header.size);
+  }
+
   if (GetRegisterInfo().IsSVEEnabled() || GetRegisterInfo().IsSSVEEnabled()) {
     dst = AddSavedRegistersKind(dst, SavedRegistersKind::SVE);
     *(reinterpret_cast<SVEState *>(dst)) = m_sve_state;
@@ -583,6 +711,12 @@
                             GetFPRSize());
   }
 
+  if ((m_sve_state == SVEState::Streaming) && GetRegisterInfo().IsZAEnabled()) {
+    assert(m_za_header.size <= GetZABufferSize());
+    dst = AddSavedRegisters(dst, SavedRegistersKind::ZA, GetZABuffer(),
+                            m_za_header.size);
+  }
+
   if (GetRegisterInfo().IsMTEEnabled()) {
     dst = AddSavedRegisters(dst, SavedRegistersKind::MTE, GetMTEControl(),
                             GetMTEControlSize());
@@ -672,13 +806,13 @@
         return error;
 
       // SVE header has been written configure SVE vector length if needed.
+      // This could change ZA data too, but that will be restored again later
+      // anyway.
       ConfigureRegisterContext();
 
       ::memcpy(GetSVEBuffer(), src, GetSVEBufferSize());
       m_sve_buffer_is_valid = true;
       error = WriteAllSVE();
-      if (error.Fail())
-        return error;
       src += GetSVEBufferSize();
 
       break;
@@ -708,10 +842,38 @@
       src += GetTLSBufferSize();
 
       break;
-    }
-  }
+    case SavedRegistersKind::ZA:
+      // To enable or disable ZA you write the regset with or without register
+      // data. The kernel detects this by looking at the ioVec's length, not the
+      // ZA header size you pass in. Therefore we must write header and register
+      // data (if present) in one go every time. Read the header only first just
+      // to get the size.
+      ::memcpy(GetZAHeader(), src, GetZAHeaderSize());
+      // Read the header and register data. Can't use the buffer size here, it
+      // may be incorrect due to being filled with dummy data previously. Resize
+      // this so WriteZA uses the correct size.
+      m_za_ptrace_payload.resize(m_za_header.size);
+      ::memcpy(GetZABuffer(), src, GetZABufferSize());
+      m_za_buffer_is_valid = true;
+
+      error = WriteZA();
+      if (error.Fail())
+        return error;
 
+      // Update size of ZA, which resizes the ptrace payload potentially
+      // trashing our copy of the data we just wrote.
+      ConfigureRegisterContext();
 
+      // ZA buffer now has proper size, read back the data we wrote above, from
+      // ptrace.
+      error = ReadZA();
+      if (error.Fail())
+        return error;
+      src += GetZABufferSize();
+
+      break;
+    }
+  }
 
   return error;
 }
@@ -734,6 +896,10 @@
   return GetRegisterInfo().IsSVEReg(reg);
 }
 
+bool NativeRegisterContextLinux_arm64::IsZA(unsigned reg) const {
+  return GetRegisterInfo().IsZAReg(reg);
+}
+
 bool NativeRegisterContextLinux_arm64::IsPAuth(unsigned reg) const {
   return GetRegisterInfo().IsPAuthReg(reg);
 }
@@ -746,6 +912,10 @@
   return GetRegisterInfo().IsTLSReg(reg);
 }
 
+bool NativeRegisterContextLinux_arm64::IsSME(unsigned reg) const {
+  return GetRegisterInfo().IsSMEReg(reg);
+}
+
 llvm::Error NativeRegisterContextLinux_arm64::ReadHardwareDebugInfo() {
   if (!m_refresh_hwdebug_info) {
     return llvm::Error::success();
@@ -887,11 +1057,13 @@
   m_fpu_is_valid = false;
   m_sve_buffer_is_valid = false;
   m_sve_header_is_valid = false;
+  m_za_buffer_is_valid = false;
+  m_za_header_is_valid = false;
   m_pac_mask_is_valid = false;
   m_mte_ctrl_is_valid = false;
   m_tls_is_valid = false;
 
-  // Update SVE registers in case there is change in configuration.
+  // Update SVE and ZA registers in case there is change in configuration.
   ConfigureRegisterContext();
 }
 
@@ -1057,6 +1229,62 @@
   return WriteRegisterSet(&ioVec, GetTLSBufferSize(), NT_ARM_TLS);
 }
 
+Status NativeRegisterContextLinux_arm64::ReadZAHeader() {
+  Status error;
+
+  if (m_za_header_is_valid)
+    return error;
+
+  struct iovec ioVec;
+  ioVec.iov_base = GetZAHeader();
+  ioVec.iov_len = GetZAHeaderSize();
+
+  error = ReadRegisterSet(&ioVec, GetZAHeaderSize(), NT_ARM_ZA);
+
+  if (error.Success())
+    m_za_header_is_valid = true;
+
+  return error;
+}
+
+Status NativeRegisterContextLinux_arm64::ReadZA() {
+  Status error;
+
+  if (m_za_buffer_is_valid)
+    return error;
+
+  struct iovec ioVec;
+  ioVec.iov_base = GetZABuffer();
+  ioVec.iov_len = GetZABufferSize();
+
+  error = ReadRegisterSet(&ioVec, GetZABufferSize(), NT_ARM_ZA);
+
+  if (error.Success())
+    m_za_buffer_is_valid = true;
+
+  return error;
+}
+
+Status NativeRegisterContextLinux_arm64::WriteZA() {
+  // Note that because the ZA ptrace payload contains the header also, this
+  // method will write both. This is done because writing only the header
+  // will disable ZA, even if .size in the header is correct for an enabled ZA.
+  Status error;
+
+  error = ReadZA();
+  if (error.Fail())
+    return error;
+
+  struct iovec ioVec;
+  ioVec.iov_base = GetZABuffer();
+  ioVec.iov_len = GetZABufferSize();
+
+  m_za_buffer_is_valid = false;
+  m_za_header_is_valid = false;
+
+  return WriteRegisterSet(&ioVec, GetZABufferSize(), NT_ARM_ZA);
+}
+
 void NativeRegisterContextLinux_arm64::ConfigureRegisterContext() {
   // ConfigureRegisterContext gets called from InvalidateAllRegisters
   // on every stop and configures SVE vector length and whether we are in
@@ -1106,6 +1334,19 @@
       m_sve_ptrace_payload.resize(sve::PTraceSize(vq, sve::ptrace_regs_sve));
     }
   }
+
+  if (!m_za_header_is_valid) {
+    Status error = ReadZAHeader();
+    if (error.Success()) {
+      uint32_t vq = RegisterInfoPOSIX_arm64::eVectorQuadwordAArch64SVE;
+      if (sve::vl_valid(m_za_header.vl))
+        vq = sve::vq_from_vl(m_za_header.vl);
+
+      GetRegisterInfo().ConfigureVectorLengthZA(vq);
+      m_za_ptrace_payload.resize(m_za_header.size);
+      m_za_buffer_is_valid = false;
+    }
+  }
 }
 
 uint32_t NativeRegisterContextLinux_arm64::CalculateFprOffset(
@@ -1131,12 +1372,27 @@
   return sve_reg_offset;
 }
 
+Status NativeRegisterContextLinux_arm64::ReadSMESVG() {
+  // This register is the streaming vector length, so we will get it from
+  // NT_ARM_ZA regardless of the current streaming mode.
+  Status error = ReadZAHeader();
+  if (error.Success())
+    m_sme_regs.svg_reg = m_za_header.vl / 8;
+
+  return error;
+}
+
 std::vector<uint32_t> NativeRegisterContextLinux_arm64::GetExpeditedRegisters(
     ExpeditedRegs expType) const {
   std::vector<uint32_t> expedited_reg_nums =
       NativeRegisterContext::GetExpeditedRegisters(expType);
+  // SVE, non-streaming vector length.
   if (m_sve_state == SVEState::FPSIMD || m_sve_state == SVEState::Full)
     expedited_reg_nums.push_back(GetRegisterInfo().GetRegNumSVEVG());
+  // SME, streaming vector length. This is used by the ZA register which is
+  // present even when streaming mode is not enabled.
+  if (GetRegisterInfo().IsSSVEEnabled())
+    expedited_reg_nums.push_back(GetRegisterInfo().GetRegNumSMEVG());
 
   return expedited_reg_nums;
 }
Index: lldb/include/lldb/Utility/RegisterValue.h
===================================================================
--- lldb/include/lldb/Utility/RegisterValue.h
+++ lldb/include/lldb/Utility/RegisterValue.h
@@ -33,7 +33,9 @@
     // byte AArch64 SVE.
     kTypicalRegisterByteSize = 256u,
     // Anything else we'll heap allocate storage for it.
-    kMaxRegisterByteSize = kTypicalRegisterByteSize,
+    // 256x256 to support 256 byte AArch64 SME's array storage (ZA) register.
+    // Which is a square of vector length x vector length.
+    kMaxRegisterByteSize = 256u * 256u,
   };
 
   typedef llvm::SmallVector<uint8_t, kTypicalRegisterByteSize> BytesContainer;
_______________________________________________
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to