https://github.com/topperc created 
https://github.com/llvm/llvm-project/pull/91514

Resolves #91513

>From f45df1cf1b74957e2f9609b982e964654f9af824 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.top...@sifive.com>
Date: Tue, 13 Feb 2024 16:17:50 -0800
Subject: [PATCH 1/3] [RISCV] Add canonical ISA string as Module metadata in
 IR. (#80760)

In an LTO build, we don't set the ELF attributes to indicate what
extensions were compiled with. The target CPU/Attrs in
RISCVTargetMachine do not get set for an LTO build. Each function gets a
target-cpu/feature attribute, but this isn't usable to set ELF attributs
since we wouldn't know what function to use. We can't just once since it
might have been compiler with an attribute likes target_verson.

This patch adds the ISA as Module metadata so we can retrieve it in the
backend. Individual translation units can still be compiled with
different strings so we need to collect the unique set when Modules are
merged.

The backend will need to combine the unique ISA strings to produce a
single value for the ELF attributes. This will be done in a separate
patch.
---
 clang/lib/CodeGen/CodeGenModule.cpp           |  14 +
 .../RISCV/ntlh-intrinsics/riscv32-zihintntl.c | 350 +++++++++---------
 .../test/CodeGen/RISCV/riscv-metadata-arch.c  |  20 +
 3 files changed, 209 insertions(+), 175 deletions(-)
 create mode 100644 clang/test/CodeGen/RISCV/riscv-metadata-arch.c

diff --git a/clang/lib/CodeGen/CodeGenModule.cpp 
b/clang/lib/CodeGen/CodeGenModule.cpp
index 1280bcd36de94..f576cd8b853c2 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -67,6 +67,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/RISCVISAInfo.h"
 #include "llvm/Support/TimeProfiler.h"
 #include "llvm/Support/xxhash.h"
 #include "llvm/TargetParser/Triple.h"
@@ -1059,6 +1060,19 @@ void CodeGenModule::Release() {
     llvm::LLVMContext &Ctx = TheModule.getContext();
     getModule().addModuleFlag(llvm::Module::Error, "target-abi",
                               llvm::MDString::get(Ctx, ABIStr));
+
+    // Add the canonical ISA string as metadata so the backend can set the ELF
+    // attributes correctly. We use AppendUnique so LTO will keep all of the
+    // unique ISA strings that were linked together.
+    const std::vector<std::string> &Features =
+        getTarget().getTargetOpts().Features;
+    auto ParseResult =
+        llvm::RISCVISAInfo::parseFeatures(T.isRISCV64() ? 64 : 32, Features);
+    if (!errorToBool(ParseResult.takeError()))
+      getModule().addModuleFlag(
+          llvm::Module::AppendUnique, "riscv-isa",
+          llvm::MDNode::get(
+              Ctx, llvm::MDString::get(Ctx, (*ParseResult)->toString())));
   }
 
   if (CodeGenOpts.SanitizeCfiCrossDso) {
diff --git a/clang/test/CodeGen/RISCV/ntlh-intrinsics/riscv32-zihintntl.c 
b/clang/test/CodeGen/RISCV/ntlh-intrinsics/riscv32-zihintntl.c
index 897edbc6450af..b11c2ca010e7c 100644
--- a/clang/test/CodeGen/RISCV/ntlh-intrinsics/riscv32-zihintntl.c
+++ b/clang/test/CodeGen/RISCV/ntlh-intrinsics/riscv32-zihintntl.c
@@ -28,190 +28,190 @@ vint8m1_t *scvc1, *scvc2;
 
 // clang-format off
 void ntl_all_sizes() {                                       // CHECK-LABEL: 
ntl_all_sizes
-  uc = __riscv_ntl_load(&sc, __RISCV_NTLH_INNERMOST_PRIVATE);   // CHECK: load 
i8{{.*}}align 1, !nontemporal !4, !riscv-nontemporal-domain !5
-  sc = __riscv_ntl_load(&uc, __RISCV_NTLH_INNERMOST_PRIVATE);   // CHECK: load 
i8{{.*}}align 1, !nontemporal !4, !riscv-nontemporal-domain !5
-  us = __riscv_ntl_load(&ss, __RISCV_NTLH_INNERMOST_PRIVATE);   // CHECK: load 
i16{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !5
-  ss = __riscv_ntl_load(&us, __RISCV_NTLH_INNERMOST_PRIVATE);   // CHECK: load 
i16{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !5
-  ui = __riscv_ntl_load(&si, __RISCV_NTLH_INNERMOST_PRIVATE);   // CHECK: load 
i32{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !5
-  si = __riscv_ntl_load(&ui, __RISCV_NTLH_INNERMOST_PRIVATE);   // CHECK: load 
i32{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !5
-  ull = __riscv_ntl_load(&sll, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: load 
i64{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !5
-  sll = __riscv_ntl_load(&ull, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: load 
i64{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !5
-  h1 = __riscv_ntl_load(&h2, __RISCV_NTLH_INNERMOST_PRIVATE);   // CHECK: load 
half{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !5
-  f1 = __riscv_ntl_load(&f2, __RISCV_NTLH_INNERMOST_PRIVATE);   // CHECK: load 
float{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !5
-  d1 = __riscv_ntl_load(&d2, __RISCV_NTLH_INNERMOST_PRIVATE);   // CHECK: load 
double{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !5
-  v4si1 = __riscv_ntl_load(&v4si2, __RISCV_NTLH_INNERMOST_PRIVATE);   // 
CHECK: load <4 x i32>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain 
!5
-  v8ss1 = __riscv_ntl_load(&v8ss2, __RISCV_NTLH_INNERMOST_PRIVATE);   // 
CHECK: load <8 x i16>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain 
!5
-  v16sc1 = __riscv_ntl_load(&v16sc2, __RISCV_NTLH_INNERMOST_PRIVATE);   // 
CHECK: load <16 x i8>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain 
!5
-  *scvi1 = __riscv_ntl_load(scvi2, __RISCV_NTLH_INNERMOST_PRIVATE);   // 
CHECK: load <vscale x 2 x i32>{{.*}}align 8, !nontemporal !4, 
!riscv-nontemporal-domain !5
-  *scvs1 = __riscv_ntl_load(scvs2, __RISCV_NTLH_INNERMOST_PRIVATE);   // 
CHECK: load <vscale x 4 x i16>{{.*}}align 8, !nontemporal !4, 
!riscv-nontemporal-domain !5
-  *scvc1 = __riscv_ntl_load(scvc2, __RISCV_NTLH_INNERMOST_PRIVATE);   // 
CHECK: load <vscale x 8 x i8>{{.*}}align 8, !nontemporal !4, 
!riscv-nontemporal-domain !5
+  uc = __riscv_ntl_load(&sc, __RISCV_NTLH_INNERMOST_PRIVATE);   // CHECK: load 
i8{{.*}}align 1, !nontemporal !6, !riscv-nontemporal-domain !7
+  sc = __riscv_ntl_load(&uc, __RISCV_NTLH_INNERMOST_PRIVATE);   // CHECK: load 
i8{{.*}}align 1, !nontemporal !6, !riscv-nontemporal-domain !7
+  us = __riscv_ntl_load(&ss, __RISCV_NTLH_INNERMOST_PRIVATE);   // CHECK: load 
i16{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !7
+  ss = __riscv_ntl_load(&us, __RISCV_NTLH_INNERMOST_PRIVATE);   // CHECK: load 
i16{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !7
+  ui = __riscv_ntl_load(&si, __RISCV_NTLH_INNERMOST_PRIVATE);   // CHECK: load 
i32{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !7
+  si = __riscv_ntl_load(&ui, __RISCV_NTLH_INNERMOST_PRIVATE);   // CHECK: load 
i32{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !7
+  ull = __riscv_ntl_load(&sll, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: load 
i64{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !7
+  sll = __riscv_ntl_load(&ull, __RISCV_NTLH_INNERMOST_PRIVATE); // CHECK: load 
i64{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !7
+  h1 = __riscv_ntl_load(&h2, __RISCV_NTLH_INNERMOST_PRIVATE);   // CHECK: load 
half{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !7
+  f1 = __riscv_ntl_load(&f2, __RISCV_NTLH_INNERMOST_PRIVATE);   // CHECK: load 
float{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !7
+  d1 = __riscv_ntl_load(&d2, __RISCV_NTLH_INNERMOST_PRIVATE);   // CHECK: load 
double{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !7
+  v4si1 = __riscv_ntl_load(&v4si2, __RISCV_NTLH_INNERMOST_PRIVATE);   // 
CHECK: load <4 x i32>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain 
!7
+  v8ss1 = __riscv_ntl_load(&v8ss2, __RISCV_NTLH_INNERMOST_PRIVATE);   // 
CHECK: load <8 x i16>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain 
!7
+  v16sc1 = __riscv_ntl_load(&v16sc2, __RISCV_NTLH_INNERMOST_PRIVATE);   // 
CHECK: load <16 x i8>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain 
!7
+  *scvi1 = __riscv_ntl_load(scvi2, __RISCV_NTLH_INNERMOST_PRIVATE);   // 
CHECK: load <vscale x 2 x i32>{{.*}}align 8, !nontemporal !6, 
!riscv-nontemporal-domain !7
+  *scvs1 = __riscv_ntl_load(scvs2, __RISCV_NTLH_INNERMOST_PRIVATE);   // 
CHECK: load <vscale x 4 x i16>{{.*}}align 8, !nontemporal !6, 
!riscv-nontemporal-domain !7
+  *scvc1 = __riscv_ntl_load(scvc2, __RISCV_NTLH_INNERMOST_PRIVATE);   // 
CHECK: load <vscale x 8 x i8>{{.*}}align 8, !nontemporal !6, 
!riscv-nontemporal-domain !7
 
-  uc = __riscv_ntl_load(&sc, __RISCV_NTLH_ALL_PRIVATE);   // CHECK: load 
i8{{.*}}align 1, !nontemporal !4, !riscv-nontemporal-domain !6
-  sc = __riscv_ntl_load(&uc, __RISCV_NTLH_ALL_PRIVATE);   // CHECK: load 
i8{{.*}}align 1, !nontemporal !4, !riscv-nontemporal-domain !6
-  us = __riscv_ntl_load(&ss, __RISCV_NTLH_ALL_PRIVATE);   // CHECK: load 
i16{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !6
-  ss = __riscv_ntl_load(&us, __RISCV_NTLH_ALL_PRIVATE);   // CHECK: load 
i16{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !6
-  ui = __riscv_ntl_load(&si, __RISCV_NTLH_ALL_PRIVATE);   // CHECK: load 
i32{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !6
-  si = __riscv_ntl_load(&ui, __RISCV_NTLH_ALL_PRIVATE);   // CHECK: load 
i32{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !6
-  ull = __riscv_ntl_load(&sll, __RISCV_NTLH_ALL_PRIVATE); // CHECK: load 
i64{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !6
-  sll = __riscv_ntl_load(&ull, __RISCV_NTLH_ALL_PRIVATE); // CHECK: load 
i64{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !6
-  h1 = __riscv_ntl_load(&h2, __RISCV_NTLH_ALL_PRIVATE);   // CHECK: load 
half{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !6
-  f1 = __riscv_ntl_load(&f2, __RISCV_NTLH_ALL_PRIVATE);   // CHECK: load 
float{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !6
-  d1 = __riscv_ntl_load(&d2, __RISCV_NTLH_ALL_PRIVATE);   // CHECK: load 
double{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !6
-  v4si1 = __riscv_ntl_load(&v4si2, __RISCV_NTLH_ALL_PRIVATE);   // CHECK: load 
<4 x i32>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain !6
-  v8ss1 = __riscv_ntl_load(&v8ss2, __RISCV_NTLH_ALL_PRIVATE);   // CHECK: load 
<8 x i16>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain !6
-  v16sc1 = __riscv_ntl_load(&v16sc2, __RISCV_NTLH_ALL_PRIVATE);   // CHECK: 
load <16 x i8>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain !6
-  *scvi1 = __riscv_ntl_load(scvi2, __RISCV_NTLH_ALL_PRIVATE);   // CHECK: load 
<vscale x 2 x i32>{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !6
-  *scvs1 = __riscv_ntl_load(scvs2, __RISCV_NTLH_ALL_PRIVATE);   // CHECK: load 
<vscale x 4 x i16>{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !6
-  *scvc1 = __riscv_ntl_load(scvc2, __RISCV_NTLH_ALL_PRIVATE);   // CHECK: load 
<vscale x 8 x i8>{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !6
+  uc = __riscv_ntl_load(&sc, __RISCV_NTLH_ALL_PRIVATE);   // CHECK: load 
i8{{.*}}align 1, !nontemporal !6, !riscv-nontemporal-domain !8
+  sc = __riscv_ntl_load(&uc, __RISCV_NTLH_ALL_PRIVATE);   // CHECK: load 
i8{{.*}}align 1, !nontemporal !6, !riscv-nontemporal-domain !8
+  us = __riscv_ntl_load(&ss, __RISCV_NTLH_ALL_PRIVATE);   // CHECK: load 
i16{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !8
+  ss = __riscv_ntl_load(&us, __RISCV_NTLH_ALL_PRIVATE);   // CHECK: load 
i16{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !8
+  ui = __riscv_ntl_load(&si, __RISCV_NTLH_ALL_PRIVATE);   // CHECK: load 
i32{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !8
+  si = __riscv_ntl_load(&ui, __RISCV_NTLH_ALL_PRIVATE);   // CHECK: load 
i32{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !8
+  ull = __riscv_ntl_load(&sll, __RISCV_NTLH_ALL_PRIVATE); // CHECK: load 
i64{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !8
+  sll = __riscv_ntl_load(&ull, __RISCV_NTLH_ALL_PRIVATE); // CHECK: load 
i64{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !8
+  h1 = __riscv_ntl_load(&h2, __RISCV_NTLH_ALL_PRIVATE);   // CHECK: load 
half{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !8
+  f1 = __riscv_ntl_load(&f2, __RISCV_NTLH_ALL_PRIVATE);   // CHECK: load 
float{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !8
+  d1 = __riscv_ntl_load(&d2, __RISCV_NTLH_ALL_PRIVATE);   // CHECK: load 
double{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !8
+  v4si1 = __riscv_ntl_load(&v4si2, __RISCV_NTLH_ALL_PRIVATE);   // CHECK: load 
<4 x i32>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain !8
+  v8ss1 = __riscv_ntl_load(&v8ss2, __RISCV_NTLH_ALL_PRIVATE);   // CHECK: load 
<8 x i16>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain !8
+  v16sc1 = __riscv_ntl_load(&v16sc2, __RISCV_NTLH_ALL_PRIVATE);   // CHECK: 
load <16 x i8>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain !8
+  *scvi1 = __riscv_ntl_load(scvi2, __RISCV_NTLH_ALL_PRIVATE);   // CHECK: load 
<vscale x 2 x i32>{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !8
+  *scvs1 = __riscv_ntl_load(scvs2, __RISCV_NTLH_ALL_PRIVATE);   // CHECK: load 
<vscale x 4 x i16>{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !8
+  *scvc1 = __riscv_ntl_load(scvc2, __RISCV_NTLH_ALL_PRIVATE);   // CHECK: load 
<vscale x 8 x i8>{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !8
 
-  uc = __riscv_ntl_load(&sc, __RISCV_NTLH_INNERMOST_SHARED);   // CHECK: load 
i8{{.*}}align 1, !nontemporal !4, !riscv-nontemporal-domain !7
-  sc = __riscv_ntl_load(&uc, __RISCV_NTLH_INNERMOST_SHARED);   // CHECK: load 
i8{{.*}}align 1, !nontemporal !4, !riscv-nontemporal-domain !7
-  us = __riscv_ntl_load(&ss, __RISCV_NTLH_INNERMOST_SHARED);   // CHECK: load 
i16{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !7
-  ss = __riscv_ntl_load(&us, __RISCV_NTLH_INNERMOST_SHARED);   // CHECK: load 
i16{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !7
-  ui = __riscv_ntl_load(&si, __RISCV_NTLH_INNERMOST_SHARED);   // CHECK: load 
i32{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !7
-  si = __riscv_ntl_load(&ui, __RISCV_NTLH_INNERMOST_SHARED);   // CHECK: load 
i32{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !7
-  ull = __riscv_ntl_load(&sll, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: load 
i64{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !7
-  sll = __riscv_ntl_load(&ull, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: load 
i64{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !7
-  h1 = __riscv_ntl_load(&h2, __RISCV_NTLH_INNERMOST_SHARED);   // CHECK: load 
half{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !7
-  f1 = __riscv_ntl_load(&f2, __RISCV_NTLH_INNERMOST_SHARED);   // CHECK: load 
float{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !7
-  d1 = __riscv_ntl_load(&d2, __RISCV_NTLH_INNERMOST_SHARED);   // CHECK: load 
double{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !7
-  v4si1 = __riscv_ntl_load(&v4si2, __RISCV_NTLH_INNERMOST_SHARED);   // CHECK: 
load <4 x i32>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain !7
-  v8ss1 = __riscv_ntl_load(&v8ss2, __RISCV_NTLH_INNERMOST_SHARED);   // CHECK: 
load <8 x i16>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain !7
-  v16sc1 = __riscv_ntl_load(&v16sc2, __RISCV_NTLH_INNERMOST_SHARED);   // 
CHECK: load <16 x i8>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain 
!7
-  *scvi1 = __riscv_ntl_load(scvi2, __RISCV_NTLH_INNERMOST_SHARED);   // CHECK: 
load <vscale x 2 x i32>{{.*}}align 8, !nontemporal !4, 
!riscv-nontemporal-domain !7
-  *scvs1 = __riscv_ntl_load(scvs2, __RISCV_NTLH_INNERMOST_SHARED);   // CHECK: 
load <vscale x 4 x i16>{{.*}}align 8, !nontemporal !4, 
!riscv-nontemporal-domain !7
-  *scvc1 = __riscv_ntl_load(scvc2, __RISCV_NTLH_INNERMOST_SHARED);   // CHECK: 
load <vscale x 8 x i8>{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain 
!7
+  uc = __riscv_ntl_load(&sc, __RISCV_NTLH_INNERMOST_SHARED);   // CHECK: load 
i8{{.*}}align 1, !nontemporal !6, !riscv-nontemporal-domain !9
+  sc = __riscv_ntl_load(&uc, __RISCV_NTLH_INNERMOST_SHARED);   // CHECK: load 
i8{{.*}}align 1, !nontemporal !6, !riscv-nontemporal-domain !9
+  us = __riscv_ntl_load(&ss, __RISCV_NTLH_INNERMOST_SHARED);   // CHECK: load 
i16{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !9
+  ss = __riscv_ntl_load(&us, __RISCV_NTLH_INNERMOST_SHARED);   // CHECK: load 
i16{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !9
+  ui = __riscv_ntl_load(&si, __RISCV_NTLH_INNERMOST_SHARED);   // CHECK: load 
i32{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !9
+  si = __riscv_ntl_load(&ui, __RISCV_NTLH_INNERMOST_SHARED);   // CHECK: load 
i32{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !9
+  ull = __riscv_ntl_load(&sll, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: load 
i64{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !9
+  sll = __riscv_ntl_load(&ull, __RISCV_NTLH_INNERMOST_SHARED); // CHECK: load 
i64{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !9
+  h1 = __riscv_ntl_load(&h2, __RISCV_NTLH_INNERMOST_SHARED);   // CHECK: load 
half{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !9
+  f1 = __riscv_ntl_load(&f2, __RISCV_NTLH_INNERMOST_SHARED);   // CHECK: load 
float{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !9
+  d1 = __riscv_ntl_load(&d2, __RISCV_NTLH_INNERMOST_SHARED);   // CHECK: load 
double{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !9
+  v4si1 = __riscv_ntl_load(&v4si2, __RISCV_NTLH_INNERMOST_SHARED);   // CHECK: 
load <4 x i32>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain !9
+  v8ss1 = __riscv_ntl_load(&v8ss2, __RISCV_NTLH_INNERMOST_SHARED);   // CHECK: 
load <8 x i16>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain !9
+  v16sc1 = __riscv_ntl_load(&v16sc2, __RISCV_NTLH_INNERMOST_SHARED);   // 
CHECK: load <16 x i8>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain 
!9
+  *scvi1 = __riscv_ntl_load(scvi2, __RISCV_NTLH_INNERMOST_SHARED);   // CHECK: 
load <vscale x 2 x i32>{{.*}}align 8, !nontemporal !6, 
!riscv-nontemporal-domain !9
+  *scvs1 = __riscv_ntl_load(scvs2, __RISCV_NTLH_INNERMOST_SHARED);   // CHECK: 
load <vscale x 4 x i16>{{.*}}align 8, !nontemporal !6, 
!riscv-nontemporal-domain !9
+  *scvc1 = __riscv_ntl_load(scvc2, __RISCV_NTLH_INNERMOST_SHARED);   // CHECK: 
load <vscale x 8 x i8>{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain 
!9
 
-  uc = __riscv_ntl_load(&sc, __RISCV_NTLH_ALL);   // CHECK: load i8{{.*}}align 
1, !nontemporal !4, !riscv-nontemporal-domain !8
-  sc = __riscv_ntl_load(&uc, __RISCV_NTLH_ALL);   // CHECK: load i8{{.*}}align 
1, !nontemporal !4, !riscv-nontemporal-domain !8
-  us = __riscv_ntl_load(&ss, __RISCV_NTLH_ALL);   // CHECK: load 
i16{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !8
-  ss = __riscv_ntl_load(&us, __RISCV_NTLH_ALL);   // CHECK: load 
i16{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !8
-  ui = __riscv_ntl_load(&si, __RISCV_NTLH_ALL);   // CHECK: load 
i32{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !8
-  si = __riscv_ntl_load(&ui, __RISCV_NTLH_ALL);   // CHECK: load 
i32{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !8
-  ull = __riscv_ntl_load(&sll, __RISCV_NTLH_ALL); // CHECK: load 
i64{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !8
-  sll = __riscv_ntl_load(&ull, __RISCV_NTLH_ALL); // CHECK: load 
i64{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !8
-  h1 = __riscv_ntl_load(&h2, __RISCV_NTLH_ALL);   // CHECK: load 
half{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !8
-  f1 = __riscv_ntl_load(&f2, __RISCV_NTLH_ALL);   // CHECK: load 
float{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !8
-  d1 = __riscv_ntl_load(&d2, __RISCV_NTLH_ALL);   // CHECK: load 
double{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !8
-  v4si1 = __riscv_ntl_load(&v4si2, __RISCV_NTLH_ALL);   // CHECK: load <4 x 
i32>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain !8
-  v8ss1 = __riscv_ntl_load(&v8ss2, __RISCV_NTLH_ALL);   // CHECK: load <8 x 
i16>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain !8
-  v16sc1 = __riscv_ntl_load(&v16sc2, __RISCV_NTLH_ALL);   // CHECK: load <16 x 
i8>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain !8
-  *scvi1 = __riscv_ntl_load(scvi2, __RISCV_NTLH_ALL);   // CHECK: load <vscale 
x 2 x i32>{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !8
-  *scvs1 = __riscv_ntl_load(scvs2, __RISCV_NTLH_ALL);   // CHECK: load <vscale 
x 4 x i16>{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !8
-  *scvc1 = __riscv_ntl_load(scvc2, __RISCV_NTLH_ALL);   // CHECK: load <vscale 
x 8 x i8>{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !8
+  uc = __riscv_ntl_load(&sc, __RISCV_NTLH_ALL);   // CHECK: load i8{{.*}}align 
1, !nontemporal !6, !riscv-nontemporal-domain !10
+  sc = __riscv_ntl_load(&uc, __RISCV_NTLH_ALL);   // CHECK: load i8{{.*}}align 
1, !nontemporal !6, !riscv-nontemporal-domain !10
+  us = __riscv_ntl_load(&ss, __RISCV_NTLH_ALL);   // CHECK: load 
i16{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !10
+  ss = __riscv_ntl_load(&us, __RISCV_NTLH_ALL);   // CHECK: load 
i16{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !10
+  ui = __riscv_ntl_load(&si, __RISCV_NTLH_ALL);   // CHECK: load 
i32{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !10
+  si = __riscv_ntl_load(&ui, __RISCV_NTLH_ALL);   // CHECK: load 
i32{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !10
+  ull = __riscv_ntl_load(&sll, __RISCV_NTLH_ALL); // CHECK: load 
i64{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !10
+  sll = __riscv_ntl_load(&ull, __RISCV_NTLH_ALL); // CHECK: load 
i64{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !10
+  h1 = __riscv_ntl_load(&h2, __RISCV_NTLH_ALL);   // CHECK: load 
half{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !10
+  f1 = __riscv_ntl_load(&f2, __RISCV_NTLH_ALL);   // CHECK: load 
float{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !10
+  d1 = __riscv_ntl_load(&d2, __RISCV_NTLH_ALL);   // CHECK: load 
double{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !10
+  v4si1 = __riscv_ntl_load(&v4si2, __RISCV_NTLH_ALL);   // CHECK: load <4 x 
i32>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain !10
+  v8ss1 = __riscv_ntl_load(&v8ss2, __RISCV_NTLH_ALL);   // CHECK: load <8 x 
i16>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain !10
+  v16sc1 = __riscv_ntl_load(&v16sc2, __RISCV_NTLH_ALL);   // CHECK: load <16 x 
i8>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain !10
+  *scvi1 = __riscv_ntl_load(scvi2, __RISCV_NTLH_ALL);   // CHECK: load <vscale 
x 2 x i32>{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !10
+  *scvs1 = __riscv_ntl_load(scvs2, __RISCV_NTLH_ALL);   // CHECK: load <vscale 
x 4 x i16>{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !10
+  *scvc1 = __riscv_ntl_load(scvc2, __RISCV_NTLH_ALL);   // CHECK: load <vscale 
x 8 x i8>{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !10
 
-  uc = __riscv_ntl_load(&sc);   // CHECK: load i8{{.*}}align 1, !nontemporal 
!4, !riscv-nontemporal-domain !8
-  sc = __riscv_ntl_load(&uc);   // CHECK: load i8{{.*}}align 1, !nontemporal 
!4, !riscv-nontemporal-domain !8
-  us = __riscv_ntl_load(&ss);   // CHECK: load i16{{.*}}align 2, !nontemporal 
!4, !riscv-nontemporal-domain !8
-  ss = __riscv_ntl_load(&us);   // CHECK: load i16{{.*}}align 2, !nontemporal 
!4, !riscv-nontemporal-domain !8
-  ui = __riscv_ntl_load(&si);   // CHECK: load i32{{.*}}align 4, !nontemporal 
!4, !riscv-nontemporal-domain !8
-  si = __riscv_ntl_load(&ui);   // CHECK: load i32{{.*}}align 4, !nontemporal 
!4, !riscv-nontemporal-domain !8
-  ull = __riscv_ntl_load(&sll); // CHECK: load i64{{.*}}align 8, !nontemporal 
!4, !riscv-nontemporal-domain !8
-  sll = __riscv_ntl_load(&ull); // CHECK: load i64{{.*}}align 8, !nontemporal 
!4, !riscv-nontemporal-domain !8
-  h1 = __riscv_ntl_load(&h2);   // CHECK: load half{{.*}}align 2, !nontemporal 
!4, !riscv-nontemporal-domain !8
-  f1 = __riscv_ntl_load(&f2);   // CHECK: load float{{.*}}align 4, 
!nontemporal !4, !riscv-nontemporal-domain !8
-  d1 = __riscv_ntl_load(&d2);   // CHECK: load double{{.*}}align 8, 
!nontemporal !4, !riscv-nontemporal-domain !8
-  v4si1 = __riscv_ntl_load(&v4si2);   // CHECK: load <4 x i32>{{.*}}align 16, 
!nontemporal !4, !riscv-nontemporal-domain !8
-  v8ss1 = __riscv_ntl_load(&v8ss2);   // CHECK: load <8 x i16>{{.*}}align 16, 
!nontemporal !4, !riscv-nontemporal-domain !8
-  v16sc1 = __riscv_ntl_load(&v16sc2);   // CHECK: load <16 x i8>{{.*}}align 
16, !nontemporal !4, !riscv-nontemporal-domain !8
-  *scvi1 = __riscv_ntl_load(scvi2);   // CHECK: load <vscale x 2 x 
i32>{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !8
-  *scvs1 = __riscv_ntl_load(scvs2);   // CHECK: load <vscale x 4 x 
i16>{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !8
-  *scvc1 = __riscv_ntl_load(scvc2);   // CHECK: load <vscale x 8 x 
i8>{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !8
+  uc = __riscv_ntl_load(&sc);   // CHECK: load i8{{.*}}align 1, !nontemporal 
!6, !riscv-nontemporal-domain !10
+  sc = __riscv_ntl_load(&uc);   // CHECK: load i8{{.*}}align 1, !nontemporal 
!6, !riscv-nontemporal-domain !10
+  us = __riscv_ntl_load(&ss);   // CHECK: load i16{{.*}}align 2, !nontemporal 
!6, !riscv-nontemporal-domain !10
+  ss = __riscv_ntl_load(&us);   // CHECK: load i16{{.*}}align 2, !nontemporal 
!6, !riscv-nontemporal-domain !10
+  ui = __riscv_ntl_load(&si);   // CHECK: load i32{{.*}}align 4, !nontemporal 
!6, !riscv-nontemporal-domain !10
+  si = __riscv_ntl_load(&ui);   // CHECK: load i32{{.*}}align 4, !nontemporal 
!6, !riscv-nontemporal-domain !10
+  ull = __riscv_ntl_load(&sll); // CHECK: load i64{{.*}}align 8, !nontemporal 
!6, !riscv-nontemporal-domain !10
+  sll = __riscv_ntl_load(&ull); // CHECK: load i64{{.*}}align 8, !nontemporal 
!6, !riscv-nontemporal-domain !10
+  h1 = __riscv_ntl_load(&h2);   // CHECK: load half{{.*}}align 2, !nontemporal 
!6, !riscv-nontemporal-domain !10
+  f1 = __riscv_ntl_load(&f2);   // CHECK: load float{{.*}}align 4, 
!nontemporal !6, !riscv-nontemporal-domain !10
+  d1 = __riscv_ntl_load(&d2);   // CHECK: load double{{.*}}align 8, 
!nontemporal !6, !riscv-nontemporal-domain !10
+  v4si1 = __riscv_ntl_load(&v4si2);   // CHECK: load <4 x i32>{{.*}}align 16, 
!nontemporal !6, !riscv-nontemporal-domain !10
+  v8ss1 = __riscv_ntl_load(&v8ss2);   // CHECK: load <8 x i16>{{.*}}align 16, 
!nontemporal !6, !riscv-nontemporal-domain !10
+  v16sc1 = __riscv_ntl_load(&v16sc2);   // CHECK: load <16 x i8>{{.*}}align 
16, !nontemporal !6, !riscv-nontemporal-domain !10
+  *scvi1 = __riscv_ntl_load(scvi2);   // CHECK: load <vscale x 2 x 
i32>{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !10
+  *scvs1 = __riscv_ntl_load(scvs2);   // CHECK: load <vscale x 4 x 
i16>{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !10
+  *scvc1 = __riscv_ntl_load(scvc2);   // CHECK: load <vscale x 8 x 
i8>{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !10
 
-  __riscv_ntl_store(&uc, 1, __RISCV_NTLH_INNERMOST_PRIVATE);    // CHECK: 
store i8{{.*}}align 1, !nontemporal !4, !riscv-nontemporal-domain !5
-  __riscv_ntl_store(&sc, 1, __RISCV_NTLH_INNERMOST_PRIVATE);    // CHECK: 
store i8{{.*}}align 1, !nontemporal !4, !riscv-nontemporal-domain !5
-  __riscv_ntl_store(&us, 1, __RISCV_NTLH_INNERMOST_PRIVATE);    // CHECK: 
store i16{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !5
-  __riscv_ntl_store(&ss, 1, __RISCV_NTLH_INNERMOST_PRIVATE);    // CHECK: 
store i16{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !5
-  __riscv_ntl_store(&ui, 1, __RISCV_NTLH_INNERMOST_PRIVATE);    // CHECK: 
store i32{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !5
-  __riscv_ntl_store(&si, 1, __RISCV_NTLH_INNERMOST_PRIVATE);    // CHECK: 
store i32{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !5
-  __riscv_ntl_store(&ull, 1, __RISCV_NTLH_INNERMOST_PRIVATE);   // CHECK: 
store i64{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !5
-  __riscv_ntl_store(&sll, 1, __RISCV_NTLH_INNERMOST_PRIVATE);   // CHECK: 
store i64{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !5
-  __riscv_ntl_store(&h1, 1.0, __RISCV_NTLH_INNERMOST_PRIVATE);  // CHECK: 
store half{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !5
-  __riscv_ntl_store(&f1, 1.0, __RISCV_NTLH_INNERMOST_PRIVATE);  // CHECK: 
store float{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !5
-  __riscv_ntl_store(&d1, 1.0, __RISCV_NTLH_INNERMOST_PRIVATE);  // CHECK: 
store double{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !5
-  __riscv_ntl_store(&v4si1, v4si2, __RISCV_NTLH_INNERMOST_PRIVATE);  // CHECK: 
store <4 x i32>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain !5
-  __riscv_ntl_store(&v8ss1, v8ss2, __RISCV_NTLH_INNERMOST_PRIVATE);  // CHECK: 
store <8 x i16>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain !5
-  __riscv_ntl_store(&v16sc1, v16sc2, __RISCV_NTLH_INNERMOST_PRIVATE);  // 
CHECK: store <16 x i8>{{.*}}align 16, !nontemporal !4, 
!riscv-nontemporal-domain !5
-  __riscv_ntl_store(scvi2, *scvi1, __RISCV_NTLH_INNERMOST_PRIVATE);  // CHECK: 
store <vscale x 2 x i32>{{.*}}align 8, !nontemporal !4, 
!riscv-nontemporal-domain !5
-  __riscv_ntl_store(scvs2, *scvs1, __RISCV_NTLH_INNERMOST_PRIVATE);  // CHECK: 
store <vscale x 4 x i16>{{.*}}align 8, !nontemporal !4, 
!riscv-nontemporal-domain !5
-  __riscv_ntl_store(scvc2, *scvc1, __RISCV_NTLH_INNERMOST_PRIVATE);  // CHECK: 
store <vscale x 8 x i8>{{.*}}align 8, !nontemporal !4, 
!riscv-nontemporal-domain !5
+  __riscv_ntl_store(&uc, 1, __RISCV_NTLH_INNERMOST_PRIVATE);    // CHECK: 
store i8{{.*}}align 1, !nontemporal !6, !riscv-nontemporal-domain !7
+  __riscv_ntl_store(&sc, 1, __RISCV_NTLH_INNERMOST_PRIVATE);    // CHECK: 
store i8{{.*}}align 1, !nontemporal !6, !riscv-nontemporal-domain !7
+  __riscv_ntl_store(&us, 1, __RISCV_NTLH_INNERMOST_PRIVATE);    // CHECK: 
store i16{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !7
+  __riscv_ntl_store(&ss, 1, __RISCV_NTLH_INNERMOST_PRIVATE);    // CHECK: 
store i16{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !7
+  __riscv_ntl_store(&ui, 1, __RISCV_NTLH_INNERMOST_PRIVATE);    // CHECK: 
store i32{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !7
+  __riscv_ntl_store(&si, 1, __RISCV_NTLH_INNERMOST_PRIVATE);    // CHECK: 
store i32{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !7
+  __riscv_ntl_store(&ull, 1, __RISCV_NTLH_INNERMOST_PRIVATE);   // CHECK: 
store i64{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !7
+  __riscv_ntl_store(&sll, 1, __RISCV_NTLH_INNERMOST_PRIVATE);   // CHECK: 
store i64{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !7
+  __riscv_ntl_store(&h1, 1.0, __RISCV_NTLH_INNERMOST_PRIVATE);  // CHECK: 
store half{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !7
+  __riscv_ntl_store(&f1, 1.0, __RISCV_NTLH_INNERMOST_PRIVATE);  // CHECK: 
store float{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !7
+  __riscv_ntl_store(&d1, 1.0, __RISCV_NTLH_INNERMOST_PRIVATE);  // CHECK: 
store double{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !7
+  __riscv_ntl_store(&v4si1, v4si2, __RISCV_NTLH_INNERMOST_PRIVATE);  // CHECK: 
store <4 x i32>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain !7
+  __riscv_ntl_store(&v8ss1, v8ss2, __RISCV_NTLH_INNERMOST_PRIVATE);  // CHECK: 
store <8 x i16>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain !7
+  __riscv_ntl_store(&v16sc1, v16sc2, __RISCV_NTLH_INNERMOST_PRIVATE);  // 
CHECK: store <16 x i8>{{.*}}align 16, !nontemporal !6, 
!riscv-nontemporal-domain !7
+  __riscv_ntl_store(scvi2, *scvi1, __RISCV_NTLH_INNERMOST_PRIVATE);  // CHECK: 
store <vscale x 2 x i32>{{.*}}align 8, !nontemporal !6, 
!riscv-nontemporal-domain !7
+  __riscv_ntl_store(scvs2, *scvs1, __RISCV_NTLH_INNERMOST_PRIVATE);  // CHECK: 
store <vscale x 4 x i16>{{.*}}align 8, !nontemporal !6, 
!riscv-nontemporal-domain !7
+  __riscv_ntl_store(scvc2, *scvc1, __RISCV_NTLH_INNERMOST_PRIVATE);  // CHECK: 
store <vscale x 8 x i8>{{.*}}align 8, !nontemporal !6, 
!riscv-nontemporal-domain !7
 
-  __riscv_ntl_store(&uc, 1, __RISCV_NTLH_ALL_PRIVATE);    // CHECK: store 
i8{{.*}}align 1, !nontemporal !4, !riscv-nontemporal-domain !6
-  __riscv_ntl_store(&sc, 1, __RISCV_NTLH_ALL_PRIVATE);    // CHECK: store 
i8{{.*}}align 1, !nontemporal !4, !riscv-nontemporal-domain !6
-  __riscv_ntl_store(&us, 1, __RISCV_NTLH_ALL_PRIVATE);    // CHECK: store 
i16{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !6
-  __riscv_ntl_store(&ss, 1, __RISCV_NTLH_ALL_PRIVATE);    // CHECK: store 
i16{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !6
-  __riscv_ntl_store(&ui, 1, __RISCV_NTLH_ALL_PRIVATE);    // CHECK: store 
i32{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !6
-  __riscv_ntl_store(&si, 1, __RISCV_NTLH_ALL_PRIVATE);    // CHECK: store 
i32{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !6
-  __riscv_ntl_store(&ull, 1, __RISCV_NTLH_ALL_PRIVATE);   // CHECK: store 
i64{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !6
-  __riscv_ntl_store(&sll, 1, __RISCV_NTLH_ALL_PRIVATE);   // CHECK: store 
i64{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !6
-  __riscv_ntl_store(&h1, 1.0, __RISCV_NTLH_ALL_PRIVATE);  // CHECK: store 
half{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !6
-  __riscv_ntl_store(&f1, 1.0, __RISCV_NTLH_ALL_PRIVATE);  // CHECK: store 
float{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !6
-  __riscv_ntl_store(&d1, 1.0, __RISCV_NTLH_ALL_PRIVATE);  // CHECK: store 
double{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !6
-  __riscv_ntl_store(&v4si1, v4si2, __RISCV_NTLH_ALL_PRIVATE);  // CHECK: store 
<4 x i32>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain !6
-  __riscv_ntl_store(&v8ss1, v8ss2, __RISCV_NTLH_ALL_PRIVATE);  // CHECK: store 
<8 x i16>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain !6
-  __riscv_ntl_store(&v16sc1, v16sc2, __RISCV_NTLH_ALL_PRIVATE);  // CHECK: 
store <16 x i8>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain !6
-  __riscv_ntl_store(scvi2, *scvi1, __RISCV_NTLH_ALL_PRIVATE);  // CHECK: store 
<vscale x 2 x i32>{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !6
-  __riscv_ntl_store(scvs2, *scvs1, __RISCV_NTLH_ALL_PRIVATE);  // CHECK: store 
<vscale x 4 x i16>{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !6
-  __riscv_ntl_store(scvc2, *scvc1, __RISCV_NTLH_ALL_PRIVATE);  // CHECK: store 
<vscale x 8 x i8>{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !6
+  __riscv_ntl_store(&uc, 1, __RISCV_NTLH_ALL_PRIVATE);    // CHECK: store 
i8{{.*}}align 1, !nontemporal !6, !riscv-nontemporal-domain !8
+  __riscv_ntl_store(&sc, 1, __RISCV_NTLH_ALL_PRIVATE);    // CHECK: store 
i8{{.*}}align 1, !nontemporal !6, !riscv-nontemporal-domain !8
+  __riscv_ntl_store(&us, 1, __RISCV_NTLH_ALL_PRIVATE);    // CHECK: store 
i16{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !8
+  __riscv_ntl_store(&ss, 1, __RISCV_NTLH_ALL_PRIVATE);    // CHECK: store 
i16{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !8
+  __riscv_ntl_store(&ui, 1, __RISCV_NTLH_ALL_PRIVATE);    // CHECK: store 
i32{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !8
+  __riscv_ntl_store(&si, 1, __RISCV_NTLH_ALL_PRIVATE);    // CHECK: store 
i32{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !8
+  __riscv_ntl_store(&ull, 1, __RISCV_NTLH_ALL_PRIVATE);   // CHECK: store 
i64{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !8
+  __riscv_ntl_store(&sll, 1, __RISCV_NTLH_ALL_PRIVATE);   // CHECK: store 
i64{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !8
+  __riscv_ntl_store(&h1, 1.0, __RISCV_NTLH_ALL_PRIVATE);  // CHECK: store 
half{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !8
+  __riscv_ntl_store(&f1, 1.0, __RISCV_NTLH_ALL_PRIVATE);  // CHECK: store 
float{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !8
+  __riscv_ntl_store(&d1, 1.0, __RISCV_NTLH_ALL_PRIVATE);  // CHECK: store 
double{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !8
+  __riscv_ntl_store(&v4si1, v4si2, __RISCV_NTLH_ALL_PRIVATE);  // CHECK: store 
<4 x i32>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain !8
+  __riscv_ntl_store(&v8ss1, v8ss2, __RISCV_NTLH_ALL_PRIVATE);  // CHECK: store 
<8 x i16>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain !8
+  __riscv_ntl_store(&v16sc1, v16sc2, __RISCV_NTLH_ALL_PRIVATE);  // CHECK: 
store <16 x i8>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain !8
+  __riscv_ntl_store(scvi2, *scvi1, __RISCV_NTLH_ALL_PRIVATE);  // CHECK: store 
<vscale x 2 x i32>{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !8
+  __riscv_ntl_store(scvs2, *scvs1, __RISCV_NTLH_ALL_PRIVATE);  // CHECK: store 
<vscale x 4 x i16>{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !8
+  __riscv_ntl_store(scvc2, *scvc1, __RISCV_NTLH_ALL_PRIVATE);  // CHECK: store 
<vscale x 8 x i8>{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !8
 
-  __riscv_ntl_store(&uc, 1, __RISCV_NTLH_INNERMOST_SHARED);    // CHECK: store 
i8{{.*}}align 1, !nontemporal !4, !riscv-nontemporal-domain !7
-  __riscv_ntl_store(&sc, 1, __RISCV_NTLH_INNERMOST_SHARED);    // CHECK: store 
i8{{.*}}align 1, !nontemporal !4, !riscv-nontemporal-domain !7
-  __riscv_ntl_store(&us, 1, __RISCV_NTLH_INNERMOST_SHARED);    // CHECK: store 
i16{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !7
-  __riscv_ntl_store(&ss, 1, __RISCV_NTLH_INNERMOST_SHARED);    // CHECK: store 
i16{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !7
-  __riscv_ntl_store(&ui, 1, __RISCV_NTLH_INNERMOST_SHARED);    // CHECK: store 
i32{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !7
-  __riscv_ntl_store(&si, 1, __RISCV_NTLH_INNERMOST_SHARED);    // CHECK: store 
i32{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !7
-  __riscv_ntl_store(&ull, 1, __RISCV_NTLH_INNERMOST_SHARED);   // CHECK: store 
i64{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !7
-  __riscv_ntl_store(&sll, 1, __RISCV_NTLH_INNERMOST_SHARED);   // CHECK: store 
i64{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !7
-  __riscv_ntl_store(&h1, 1.0, __RISCV_NTLH_INNERMOST_SHARED);  // CHECK: store 
half{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !7
-  __riscv_ntl_store(&f1, 1.0, __RISCV_NTLH_INNERMOST_SHARED);  // CHECK: store 
float{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !7
-  __riscv_ntl_store(&d1, 1.0, __RISCV_NTLH_INNERMOST_SHARED);  // CHECK: store 
double{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !7
-  __riscv_ntl_store(&v4si1, v4si2, __RISCV_NTLH_INNERMOST_SHARED);  // CHECK: 
store <4 x i32>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain !7
-  __riscv_ntl_store(&v8ss1, v8ss2, __RISCV_NTLH_INNERMOST_SHARED);  // CHECK: 
store <8 x i16>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain !7
-  __riscv_ntl_store(&v16sc1, v16sc2, __RISCV_NTLH_INNERMOST_SHARED);  // 
CHECK: store <16 x i8>{{.*}}align 16, !nontemporal !4, 
!riscv-nontemporal-domain !7
-  __riscv_ntl_store(scvi2, *scvi1, __RISCV_NTLH_INNERMOST_SHARED);  // CHECK: 
store <vscale x 2 x i32>{{.*}}align 8, !nontemporal !4, 
!riscv-nontemporal-domain !7
-  __riscv_ntl_store(scvs2, *scvs1, __RISCV_NTLH_INNERMOST_SHARED);  // CHECK: 
store <vscale x 4 x i16>{{.*}}align 8, !nontemporal !4, 
!riscv-nontemporal-domain !7
-  __riscv_ntl_store(scvc2, *scvc1, __RISCV_NTLH_INNERMOST_SHARED);  // CHECK: 
store <vscale x 8 x i8>{{.*}}align 8, !nontemporal !4, 
!riscv-nontemporal-domain !7
+  __riscv_ntl_store(&uc, 1, __RISCV_NTLH_INNERMOST_SHARED);    // CHECK: store 
i8{{.*}}align 1, !nontemporal !6, !riscv-nontemporal-domain !9
+  __riscv_ntl_store(&sc, 1, __RISCV_NTLH_INNERMOST_SHARED);    // CHECK: store 
i8{{.*}}align 1, !nontemporal !6, !riscv-nontemporal-domain !9
+  __riscv_ntl_store(&us, 1, __RISCV_NTLH_INNERMOST_SHARED);    // CHECK: store 
i16{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !9
+  __riscv_ntl_store(&ss, 1, __RISCV_NTLH_INNERMOST_SHARED);    // CHECK: store 
i16{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !9
+  __riscv_ntl_store(&ui, 1, __RISCV_NTLH_INNERMOST_SHARED);    // CHECK: store 
i32{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !9
+  __riscv_ntl_store(&si, 1, __RISCV_NTLH_INNERMOST_SHARED);    // CHECK: store 
i32{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !9
+  __riscv_ntl_store(&ull, 1, __RISCV_NTLH_INNERMOST_SHARED);   // CHECK: store 
i64{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !9
+  __riscv_ntl_store(&sll, 1, __RISCV_NTLH_INNERMOST_SHARED);   // CHECK: store 
i64{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !9
+  __riscv_ntl_store(&h1, 1.0, __RISCV_NTLH_INNERMOST_SHARED);  // CHECK: store 
half{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !9
+  __riscv_ntl_store(&f1, 1.0, __RISCV_NTLH_INNERMOST_SHARED);  // CHECK: store 
float{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !9
+  __riscv_ntl_store(&d1, 1.0, __RISCV_NTLH_INNERMOST_SHARED);  // CHECK: store 
double{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !9
+  __riscv_ntl_store(&v4si1, v4si2, __RISCV_NTLH_INNERMOST_SHARED);  // CHECK: 
store <4 x i32>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain !9
+  __riscv_ntl_store(&v8ss1, v8ss2, __RISCV_NTLH_INNERMOST_SHARED);  // CHECK: 
store <8 x i16>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain !9
+  __riscv_ntl_store(&v16sc1, v16sc2, __RISCV_NTLH_INNERMOST_SHARED);  // 
CHECK: store <16 x i8>{{.*}}align 16, !nontemporal !6, 
!riscv-nontemporal-domain !9
+  __riscv_ntl_store(scvi2, *scvi1, __RISCV_NTLH_INNERMOST_SHARED);  // CHECK: 
store <vscale x 2 x i32>{{.*}}align 8, !nontemporal !6, 
!riscv-nontemporal-domain !9
+  __riscv_ntl_store(scvs2, *scvs1, __RISCV_NTLH_INNERMOST_SHARED);  // CHECK: 
store <vscale x 4 x i16>{{.*}}align 8, !nontemporal !6, 
!riscv-nontemporal-domain !9
+  __riscv_ntl_store(scvc2, *scvc1, __RISCV_NTLH_INNERMOST_SHARED);  // CHECK: 
store <vscale x 8 x i8>{{.*}}align 8, !nontemporal !6, 
!riscv-nontemporal-domain !9
 
-  __riscv_ntl_store(&uc, 1, __RISCV_NTLH_ALL);    // CHECK: store 
i8{{.*}}align 1, !nontemporal !4, !riscv-nontemporal-domain !8
-  __riscv_ntl_store(&sc, 1, __RISCV_NTLH_ALL);    // CHECK: store 
i8{{.*}}align 1, !nontemporal !4, !riscv-nontemporal-domain !8
-  __riscv_ntl_store(&us, 1, __RISCV_NTLH_ALL);    // CHECK: store 
i16{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !8
-  __riscv_ntl_store(&ss, 1, __RISCV_NTLH_ALL);    // CHECK: store 
i16{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !8
-  __riscv_ntl_store(&ui, 1, __RISCV_NTLH_ALL);    // CHECK: store 
i32{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !8
-  __riscv_ntl_store(&si, 1, __RISCV_NTLH_ALL);    // CHECK: store 
i32{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !8
-  __riscv_ntl_store(&ull, 1, __RISCV_NTLH_ALL);   // CHECK: store 
i64{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !8
-  __riscv_ntl_store(&sll, 1, __RISCV_NTLH_ALL);   // CHECK: store 
i64{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !8
-  __riscv_ntl_store(&h1, 1.0, __RISCV_NTLH_ALL);  // CHECK: store 
half{{.*}}align 2, !nontemporal !4, !riscv-nontemporal-domain !8
-  __riscv_ntl_store(&f1, 1.0, __RISCV_NTLH_ALL);  // CHECK: store 
float{{.*}}align 4, !nontemporal !4, !riscv-nontemporal-domain !8
-  __riscv_ntl_store(&d1, 1.0, __RISCV_NTLH_ALL);  // CHECK: store 
double{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !8
-  __riscv_ntl_store(&v4si1, v4si2, __RISCV_NTLH_ALL);  // CHECK: store <4 x 
i32>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain !8
-  __riscv_ntl_store(&v8ss1, v8ss2, __RISCV_NTLH_ALL);  // CHECK: store <8 x 
i16>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain !8
-  __riscv_ntl_store(&v16sc1, v16sc2, __RISCV_NTLH_ALL);  // CHECK: store <16 x 
i8>{{.*}}align 16, !nontemporal !4, !riscv-nontemporal-domain !8
-  __riscv_ntl_store(scvi2, *scvi1, __RISCV_NTLH_ALL);  // CHECK: store <vscale 
x 2 x i32>{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !8
-  __riscv_ntl_store(scvs2, *scvs1, __RISCV_NTLH_ALL);  // CHECK: store <vscale 
x 4 x i16>{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !8
-  __riscv_ntl_store(scvc2, *scvc1, __RISCV_NTLH_ALL);  // CHECK: store <vscale 
x 8 x i8>{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !8
+  __riscv_ntl_store(&uc, 1, __RISCV_NTLH_ALL);    // CHECK: store 
i8{{.*}}align 1, !nontemporal !6, !riscv-nontemporal-domain !10
+  __riscv_ntl_store(&sc, 1, __RISCV_NTLH_ALL);    // CHECK: store 
i8{{.*}}align 1, !nontemporal !6, !riscv-nontemporal-domain !10
+  __riscv_ntl_store(&us, 1, __RISCV_NTLH_ALL);    // CHECK: store 
i16{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !10
+  __riscv_ntl_store(&ss, 1, __RISCV_NTLH_ALL);    // CHECK: store 
i16{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !10
+  __riscv_ntl_store(&ui, 1, __RISCV_NTLH_ALL);    // CHECK: store 
i32{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !10
+  __riscv_ntl_store(&si, 1, __RISCV_NTLH_ALL);    // CHECK: store 
i32{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !10
+  __riscv_ntl_store(&ull, 1, __RISCV_NTLH_ALL);   // CHECK: store 
i64{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !10
+  __riscv_ntl_store(&sll, 1, __RISCV_NTLH_ALL);   // CHECK: store 
i64{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !10
+  __riscv_ntl_store(&h1, 1.0, __RISCV_NTLH_ALL);  // CHECK: store 
half{{.*}}align 2, !nontemporal !6, !riscv-nontemporal-domain !10
+  __riscv_ntl_store(&f1, 1.0, __RISCV_NTLH_ALL);  // CHECK: store 
float{{.*}}align 4, !nontemporal !6, !riscv-nontemporal-domain !10
+  __riscv_ntl_store(&d1, 1.0, __RISCV_NTLH_ALL);  // CHECK: store 
double{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !10
+  __riscv_ntl_store(&v4si1, v4si2, __RISCV_NTLH_ALL);  // CHECK: store <4 x 
i32>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain !10
+  __riscv_ntl_store(&v8ss1, v8ss2, __RISCV_NTLH_ALL);  // CHECK: store <8 x 
i16>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain !10
+  __riscv_ntl_store(&v16sc1, v16sc2, __RISCV_NTLH_ALL);  // CHECK: store <16 x 
i8>{{.*}}align 16, !nontemporal !6, !riscv-nontemporal-domain !10
+  __riscv_ntl_store(scvi2, *scvi1, __RISCV_NTLH_ALL);  // CHECK: store <vscale 
x 2 x i32>{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !10
+  __riscv_ntl_store(scvs2, *scvs1, __RISCV_NTLH_ALL);  // CHECK: store <vscale 
x 4 x i16>{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !10
+  __riscv_ntl_store(scvc2, *scvc1, __RISCV_NTLH_ALL);  // CHECK: store <vscale 
x 8 x i8>{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !10
 
-  __riscv_ntl_store(&uc, 1);    // CHECK: store i8{{.*}}align 1, !nontemporal 
!4, !riscv-nontemporal-domain !8
-  __riscv_ntl_store(&sc, 1);    // CHECK: store i8{{.*}}align 1, !nontemporal 
!4, !riscv-nontemporal-domain !8
-  __riscv_ntl_store(&us, 1);    // CHECK: store i16{{.*}}align 2, !nontemporal 
!4, !riscv-nontemporal-domain !8
-  __riscv_ntl_store(&ss, 1);    // CHECK: store i16{{.*}}align 2, !nontemporal 
!4, !riscv-nontemporal-domain !8
-  __riscv_ntl_store(&ui, 1);    // CHECK: store i32{{.*}}align 4, !nontemporal 
!4, !riscv-nontemporal-domain !8
-  __riscv_ntl_store(&si, 1);    // CHECK: store i32{{.*}}align 4, !nontemporal 
!4, !riscv-nontemporal-domain !8
-  __riscv_ntl_store(&ull, 1);   // CHECK: store i64{{.*}}align 8, !nontemporal 
!4, !riscv-nontemporal-domain !8
-  __riscv_ntl_store(&sll, 1);   // CHECK: store i64{{.*}}align 8, !nontemporal 
!4, !riscv-nontemporal-domain !8
-  __riscv_ntl_store(&h1, 1.0);  // CHECK: store half{{.*}}align 2, 
!nontemporal !4, !riscv-nontemporal-domain !8
-  __riscv_ntl_store(&f1, 1.0);  // CHECK: store float{{.*}}align 4, 
!nontemporal !4, !riscv-nontemporal-domain !8
-  __riscv_ntl_store(&d1, 1.0);  // CHECK: store double{{.*}}align 8, 
!nontemporal !4, !riscv-nontemporal-domain !8
-  __riscv_ntl_store(&v4si1, v4si2);  // CHECK: store <4 x i32>{{.*}}align 16, 
!nontemporal !4, !riscv-nontemporal-domain !8
-  __riscv_ntl_store(&v8ss1, v8ss2);  // CHECK: store <8 x i16>{{.*}}align 16, 
!nontemporal !4, !riscv-nontemporal-domain !8
-  __riscv_ntl_store(&v16sc1, v16sc2);  // CHECK: store <16 x i8>{{.*}}align 
16, !nontemporal !4, !riscv-nontemporal-domain !8
-  __riscv_ntl_store(scvi2, *scvi1);  // CHECK: store <vscale x 2 x 
i32>{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !8
-  __riscv_ntl_store(scvs2, *scvs1);  // CHECK: store <vscale x 4 x 
i16>{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !8
-  __riscv_ntl_store(scvc2, *scvc1);  // CHECK: store <vscale x 8 x 
i8>{{.*}}align 8, !nontemporal !4, !riscv-nontemporal-domain !8
+  __riscv_ntl_store(&uc, 1);    // CHECK: store i8{{.*}}align 1, !nontemporal 
!6, !riscv-nontemporal-domain !10
+  __riscv_ntl_store(&sc, 1);    // CHECK: store i8{{.*}}align 1, !nontemporal 
!6, !riscv-nontemporal-domain !10
+  __riscv_ntl_store(&us, 1);    // CHECK: store i16{{.*}}align 2, !nontemporal 
!6, !riscv-nontemporal-domain !10
+  __riscv_ntl_store(&ss, 1);    // CHECK: store i16{{.*}}align 2, !nontemporal 
!6, !riscv-nontemporal-domain !10
+  __riscv_ntl_store(&ui, 1);    // CHECK: store i32{{.*}}align 4, !nontemporal 
!6, !riscv-nontemporal-domain !10
+  __riscv_ntl_store(&si, 1);    // CHECK: store i32{{.*}}align 4, !nontemporal 
!6, !riscv-nontemporal-domain !10
+  __riscv_ntl_store(&ull, 1);   // CHECK: store i64{{.*}}align 8, !nontemporal 
!6, !riscv-nontemporal-domain !10
+  __riscv_ntl_store(&sll, 1);   // CHECK: store i64{{.*}}align 8, !nontemporal 
!6, !riscv-nontemporal-domain !10
+  __riscv_ntl_store(&h1, 1.0);  // CHECK: store half{{.*}}align 2, 
!nontemporal !6, !riscv-nontemporal-domain !10
+  __riscv_ntl_store(&f1, 1.0);  // CHECK: store float{{.*}}align 4, 
!nontemporal !6, !riscv-nontemporal-domain !10
+  __riscv_ntl_store(&d1, 1.0);  // CHECK: store double{{.*}}align 8, 
!nontemporal !6, !riscv-nontemporal-domain !10
+  __riscv_ntl_store(&v4si1, v4si2);  // CHECK: store <4 x i32>{{.*}}align 16, 
!nontemporal !6, !riscv-nontemporal-domain !10
+  __riscv_ntl_store(&v8ss1, v8ss2);  // CHECK: store <8 x i16>{{.*}}align 16, 
!nontemporal !6, !riscv-nontemporal-domain !10
+  __riscv_ntl_store(&v16sc1, v16sc2);  // CHECK: store <16 x i8>{{.*}}align 
16, !nontemporal !6, !riscv-nontemporal-domain !10
+  __riscv_ntl_store(scvi2, *scvi1);  // CHECK: store <vscale x 2 x 
i32>{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !10
+  __riscv_ntl_store(scvs2, *scvs1);  // CHECK: store <vscale x 4 x 
i16>{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !10
+  __riscv_ntl_store(scvc2, *scvc1);  // CHECK: store <vscale x 8 x 
i8>{{.*}}align 8, !nontemporal !6, !riscv-nontemporal-domain !10
 }
 // clang-format on
 
-// CHECK: !4 = !{i32 1}
-// CHECK: !5 = !{i32 2}
-// CHECK: !6 = !{i32 3}
-// CHECK: !7 = !{i32 4}
-// CHECK: !8 = !{i32 5}
+// CHECK: !6 = !{i32 1}
+// CHECK: !7 = !{i32 2}
+// CHECK: !8 = !{i32 3}
+// CHECK: !9 = !{i32 4}
+// CHECK: !10 = !{i32 5}
diff --git a/clang/test/CodeGen/RISCV/riscv-metadata-arch.c 
b/clang/test/CodeGen/RISCV/riscv-metadata-arch.c
new file mode 100644
index 0000000000000..060eda108b54e
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/riscv-metadata-arch.c
@@ -0,0 +1,20 @@
+// RUN: %clang_cc1 -triple riscv32 -emit-llvm -o - %s \
+// RUN:   | FileCheck -check-prefix=RV32I %s
+// RUN: %clang_cc1 -triple riscv32 -target-feature +v -emit-llvm -o - %s \
+// RUN:   | FileCheck -check-prefix=RV32IV %s
+// RUN: %clang_cc1 -triple riscv64 -emit-llvm -o - %s \
+// RUN:   | FileCheck -check-prefix=RV64I %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -emit-llvm -o - %s \
+// RUN:   | FileCheck -check-prefix=RV64IV %s
+
+// RV32I:!{{[0-9]+}} = !{i32 6, !"riscv-isa", ![[ID:[0-9]+]]}
+// RV32I:![[ID]] = !{!"rv32i2p1"}
+
+// RV32IV:!{{[0-9]+}} = !{i32 6, !"riscv-isa", ![[ID:[0-9]+]]}
+// RV32IV:![[ID]] = 
!{!"rv32i2p1_f2p2_d2p2_v1p0_zicsr2p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"}
+
+// RV64I:!{{[0-9]+}} = !{i32 6, !"riscv-isa", ![[ID:[0-9]+]]}
+// RV64I:![[ID]] = !{!"rv64i2p1"}
+
+// RV64IV:!{{[0-9]+}} = !{i32 6, !"riscv-isa", ![[ID:[0-9]+]]}
+// RV64IV:![[ID]] = 
!{!"rv64i2p1_f2p2_d2p2_v1p0_zicsr2p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"}

>From f368dfc8152e7437c85a629177e66cebba667af2 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.top...@sifive.com>
Date: Thu, 29 Feb 2024 08:55:52 -0800
Subject: [PATCH 2/3] [RISCV] Store RVC and TSO ELF flags explicitly in
 RISCVTargetStreamer. NFCI (#83344)

Instead of caching STI in the RISCVELFTargetStreamer, store the two
flags we need from it.

My goal is to allow RISCVAsmPrinter to override these flags using IR
module metadata for LTO. So they need to be separated from the STI used
to construct the TargetStreamer.

This patch should be NFC as long as no one is changing the contents of
the STI that was used to construct the TargetStreamer between the
constructor and the use of the flags.
---
 llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp   | 8 ++++----
 llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h     | 1 -
 .../lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp | 5 +++++
 llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h  | 5 +++++
 4 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp 
b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
index 961b8f0afe225..cdf7c048a4bf1 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
@@ -31,12 +31,13 @@ using namespace llvm;
 // This part is for ELF object output.
 RISCVTargetELFStreamer::RISCVTargetELFStreamer(MCStreamer &S,
                                                const MCSubtargetInfo &STI)
-    : RISCVTargetStreamer(S), CurrentVendor("riscv"), STI(STI) {
+    : RISCVTargetStreamer(S), CurrentVendor("riscv") {
   MCAssembler &MCA = getStreamer().getAssembler();
   const FeatureBitset &Features = STI.getFeatureBits();
   auto &MAB = static_cast<RISCVAsmBackend &>(MCA.getBackend());
   setTargetABI(RISCVABI::computeTargetABI(STI.getTargetTriple(), Features,
                                           
MAB.getTargetOptions().getABIName()));
+  setFlagsFromFeatures(STI);
   // `j label` in `.option norelax; j label; .option relax; ...; label:` needs 
a
   // relocation to ensure the jump target is correct after linking. This is due
   // to a limitation that shouldForceRelocation has to make the decision 
upfront
@@ -87,14 +88,13 @@ void RISCVTargetELFStreamer::finishAttributeSection() {
 void RISCVTargetELFStreamer::finish() {
   RISCVTargetStreamer::finish();
   MCAssembler &MCA = getStreamer().getAssembler();
-  const FeatureBitset &Features = STI.getFeatureBits();
   RISCVABI::ABI ABI = getTargetABI();
 
   unsigned EFlags = MCA.getELFHeaderEFlags();
 
-  if (Features[RISCV::FeatureStdExtC])
+  if (hasRVC())
     EFlags |= ELF::EF_RISCV_RVC;
-  if (Features[RISCV::FeatureStdExtZtso])
+  if (hasTSO())
     EFlags |= ELF::EF_RISCV_TSO;
 
   switch (ABI) {
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h 
b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h
index a6f54bf67b5d2..e8f29cd8449ba 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h
@@ -46,7 +46,6 @@ class RISCVTargetELFStreamer : public RISCVTargetStreamer {
   StringRef CurrentVendor;
 
   MCSection *AttributeSection = nullptr;
-  const MCSubtargetInfo &STI;
 
   void emitAttribute(unsigned Attribute, unsigned Value) override;
   void emitTextAttribute(unsigned Attribute, StringRef String) override;
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp 
b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
index ac4861bf113eb..eee78a8c161f0 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
@@ -48,6 +48,11 @@ void RISCVTargetStreamer::setTargetABI(RISCVABI::ABI ABI) {
   TargetABI = ABI;
 }
 
+void RISCVTargetStreamer::setFlagsFromFeatures(const MCSubtargetInfo &STI) {
+  HasRVC = STI.hasFeature(RISCV::FeatureStdExtC);
+  HasTSO = STI.hasFeature(RISCV::FeatureStdExtZtso);
+}
+
 void RISCVTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI,
                                                bool EmitStackAlign) {
   if (EmitStackAlign) {
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h 
b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h
index 070e72fb157ae..cb8bc21cb6355 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h
@@ -33,6 +33,8 @@ struct RISCVOptionArchArg {
 
 class RISCVTargetStreamer : public MCTargetStreamer {
   RISCVABI::ABI TargetABI = RISCVABI::ABI_Unknown;
+  bool HasRVC = false;
+  bool HasTSO = false;
 
 public:
   RISCVTargetStreamer(MCStreamer &S);
@@ -58,6 +60,9 @@ class RISCVTargetStreamer : public MCTargetStreamer {
   void emitTargetAttributes(const MCSubtargetInfo &STI, bool EmitStackAlign);
   void setTargetABI(RISCVABI::ABI ABI);
   RISCVABI::ABI getTargetABI() const { return TargetABI; }
+  void setFlagsFromFeatures(const MCSubtargetInfo &STI);
+  bool hasRVC() const { return HasRVC; }
+  bool hasTSO() const { return HasTSO; }
 };
 
 // This part is for ascii assembly output

>From b7787315eecc4adfde9d4f4af28ee549652a7cc3 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.top...@sifive.com>
Date: Wed, 20 Mar 2024 11:35:19 -0700
Subject: [PATCH 3/3] [RISCV] Use 'riscv-isa' module flag to set ELF flags and
 attributes. (#85155)

Walk all the ISA strings and set the subtarget bits for any extension we
find in any string.

This allows LTO output to have a ELF attributes from the union of all of
the files used to compile it.
---
 llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp     | 32 ++++++++++++++++---
 .../CodeGen/RISCV/attributes-module-flag.ll   | 17 ++++++++++
 llvm/test/CodeGen/RISCV/module-elf-flags.ll   | 13 ++++++++
 3 files changed, 58 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/CodeGen/RISCV/attributes-module-flag.ll
 create mode 100644 llvm/test/CodeGen/RISCV/module-elf-flags.ll

diff --git a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp 
b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
index b2e9cd87373b0..87bd9b4048cd1 100644
--- a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
+++ b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
@@ -100,7 +100,7 @@ class RISCVAsmPrinter : public AsmPrinter {
   bool emitDirectiveOptionArch();
 
 private:
-  void emitAttributes();
+  void emitAttributes(const MCSubtargetInfo &SubtargetInfo);
 
   void emitNTLHint(const MachineInstr *MI);
 
@@ -385,8 +385,32 @@ void RISCVAsmPrinter::emitStartOfAsmFile(Module &M) {
   if (const MDString *ModuleTargetABI =
           dyn_cast_or_null<MDString>(M.getModuleFlag("target-abi")))
     RTS.setTargetABI(RISCVABI::getTargetABI(ModuleTargetABI->getString()));
+
+  MCSubtargetInfo SubtargetInfo = *TM.getMCSubtargetInfo();
+
+  // Use module flag to update feature bits.
+  if (auto *MD = dyn_cast_or_null<MDNode>(M.getModuleFlag("riscv-isa"))) {
+    for (auto &ISA : MD->operands()) {
+      if (auto *ISAString = dyn_cast_or_null<MDString>(ISA)) {
+        auto ParseResult = llvm::RISCVISAInfo::parseArchString(
+            ISAString->getString(), /*EnableExperimentalExtension=*/true,
+            /*ExperimentalExtensionVersionCheck=*/true);
+        if (!errorToBool(ParseResult.takeError())) {
+          auto &ISAInfo = *ParseResult;
+          for (const auto &Feature : RISCVFeatureKV) {
+            if (ISAInfo->hasExtension(Feature.Key) &&
+                !SubtargetInfo.hasFeature(Feature.Value))
+              SubtargetInfo.ToggleFeature(Feature.Key);
+          }
+        }
+      }
+    }
+
+    RTS.setFlagsFromFeatures(SubtargetInfo);
+  }
+
   if (TM.getTargetTriple().isOSBinFormatELF())
-    emitAttributes();
+    emitAttributes(SubtargetInfo);
 }
 
 void RISCVAsmPrinter::emitEndOfAsmFile(Module &M) {
@@ -398,13 +422,13 @@ void RISCVAsmPrinter::emitEndOfAsmFile(Module &M) {
   EmitHwasanMemaccessSymbols(M);
 }
 
-void RISCVAsmPrinter::emitAttributes() {
+void RISCVAsmPrinter::emitAttributes(const MCSubtargetInfo &SubtargetInfo) {
   RISCVTargetStreamer &RTS =
       static_cast<RISCVTargetStreamer &>(*OutStreamer->getTargetStreamer());
   // Use MCSubtargetInfo from TargetMachine. Individual functions may have
   // attributes that differ from other functions in the module and we have no
   // way to know which function is correct.
-  RTS.emitTargetAttributes(*TM.getMCSubtargetInfo(), /*EmitStackAlign*/ true);
+  RTS.emitTargetAttributes(SubtargetInfo, /*EmitStackAlign*/ true);
 }
 
 void RISCVAsmPrinter::emitFunctionEntryLabel() {
diff --git a/llvm/test/CodeGen/RISCV/attributes-module-flag.ll 
b/llvm/test/CodeGen/RISCV/attributes-module-flag.ll
new file mode 100644
index 0000000000000..4580539fbb29b
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/attributes-module-flag.ll
@@ -0,0 +1,17 @@
+; RUN: llc -mtriple=riscv32 %s -o - | FileCheck %s --check-prefix=RV32
+; RUN: llc -mtriple=riscv64 %s -o - | FileCheck %s --check-prefix=RV64
+
+; Test generation of ELF attribute from module metadata
+
+; RV32: .attribute 5, "rv32i2p1_m2p0_zba1p0"
+; RV64: .attribute 5, "rv64i2p1_m2p0_zba1p0"
+
+define i32 @addi(i32 %a) {
+  %1 = add i32 %a, 1
+  ret i32 %1
+}
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 6, !"riscv-isa", !1}
+!1 = !{!"rv64i2p1_m2p0", !"rv64i2p1_zba1p0"}
diff --git a/llvm/test/CodeGen/RISCV/module-elf-flags.ll 
b/llvm/test/CodeGen/RISCV/module-elf-flags.ll
new file mode 100644
index 0000000000000..1b4bc9fd5466c
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/module-elf-flags.ll
@@ -0,0 +1,13 @@
+; RUN: llc -mtriple=riscv32 -filetype=obj < %s | llvm-readelf -h - | FileCheck 
-check-prefixes=FLAGS %s
+
+; FLAGS: Flags: 0x11, RVC, TSO
+
+define i32 @addi(i32 %a) {
+  %1 = add i32 %a, 1
+  ret i32 %1
+}
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 6, !"riscv-isa", !1}
+!1 = !{!"rv64i2p1_c2p0_ztso0p1"}

_______________________________________________
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to