https://github.com/jacquesguan updated 
https://github.com/llvm/llvm-project/pull/197872

>From 4c482d410bcaee44a91e5c85cb07d2d22b5f262e Mon Sep 17 00:00:00 2001
From: Jianjian GUAN <[email protected]>
Date: Fri, 15 May 2026 10:37:40 +0800
Subject: [PATCH 1/3] [CIR] Add support for __builtin_nontemporal_store and
 __builtin_nontemporal_load

Add nontemporal attribute to cir.load and cir.store ops.
---
 .../CIR/Dialect/Builder/CIRBaseBuilder.h      | 16 ++--
 clang/include/clang/CIR/Dialect/IR/CIROps.td  |  4 +
 clang/include/clang/CIR/MissingFeatures.h     |  1 -
 clang/lib/CIR/CodeGen/CIRGenAtomic.cpp        |  1 +
 clang/lib/CIR/CodeGen/CIRGenBuilder.h         | 12 +--
 clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp       | 19 ++++-
 clang/lib/CIR/CodeGen/CIRGenExpr.cpp          | 14 ++--
 clang/lib/CIR/CodeGen/CIRGenFunction.h        |  4 +-
 clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp    |  1 +
 .../CIR/Dialect/Transforms/EHABILowering.cpp  |  6 +-
 .../lib/CIR/Dialect/Transforms/FlattenCFG.cpp | 20 +++--
 .../TargetLowering/LowerItaniumCXXABI.cpp     |  5 ++
 .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 11 +--
 .../CodeGenBuiltins/builtin-nontemporal.cpp   | 77 +++++++++++++++++++
 14 files changed, 147 insertions(+), 44 deletions(-)
 create mode 100644 clang/test/CIR/CodeGenBuiltins/builtin-nontemporal.cpp

diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h 
b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
index f5222accff154..0758a0f2c14c7 100644
--- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
+++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
@@ -222,11 +222,12 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
   }
 
   cir::LoadOp createLoad(mlir::Location loc, mlir::Value ptr,
-                         bool isVolatile = false, uint64_t alignment = 0) {
+                         bool isVolatile = false, uint64_t alignment = 0,
+                         bool isNontemporal = false) {
     mlir::IntegerAttr alignmentAttr = getAlignmentAttr(alignment);
     return cir::LoadOp::create(*this, loc, ptr, /*isDeref=*/false, isVolatile,
-                               alignmentAttr, cir::SyncScopeKindAttr{},
-                               cir::MemOrderAttr{});
+                               isNontemporal, alignmentAttr,
+                               cir::SyncScopeKindAttr{}, cir::MemOrderAttr{});
   }
 
   mlir::Value createAlignedLoad(mlir::Location loc, mlir::Value ptr,
@@ -376,15 +377,15 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
   }
 
   cir::StoreOp createStore(mlir::Location loc, mlir::Value val, mlir::Value 
dst,
-                           bool isVolatile = false,
+                           bool isVolatile = false, bool isNontemporal = false,
                            mlir::IntegerAttr align = {},
                            cir::SyncScopeKindAttr scope = {},
                            cir::MemOrderAttr order = {}) {
     if (mlir::cast<cir::PointerType>(dst.getType()).getPointee() !=
         val.getType())
       dst = createPtrBitcast(dst, val.getType());
-    return cir::StoreOp::create(*this, loc, val, dst, isVolatile, align, scope,
-                                order);
+    return cir::StoreOp::create(*this, loc, val, dst, isVolatile, 
isNontemporal,
+                                align, scope, order);
   }
 
   /// Emit a load from an boolean flag variable.
@@ -422,7 +423,8 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
     mlir::IntegerAttr alignmentAttr = getAlignmentAttr(alignment);
     auto addr = createAlloca(loc, getPointerTo(type), type, {}, alignmentAttr);
     return cir::LoadOp::create(*this, loc, addr, /*isDeref=*/false,
-                               /*isVolatile=*/false, alignmentAttr,
+                               /*isVolatile=*/false, /*nontemporal=*/false,
+                               alignmentAttr,
                                /*sync_scope=*/{}, /*mem_order=*/{});
   }
 
diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td 
b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index 67ddaa73d9184..b95622f7a2b89 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -709,6 +709,7 @@ def CIR_LoadOp : CIR_Op<"load", [
                            [MemRead]>:$addr,
                        UnitAttr:$isDeref,
                        UnitAttr:$is_volatile,
+                       UnitAttr:$nontemporal,
                        OptionalAttr<I64Attr>:$alignment,
                        OptionalAttr<CIR_SyncScopeKind>:$sync_scope,
                        OptionalAttr<CIR_MemOrder>:$mem_order);
@@ -717,6 +718,7 @@ def CIR_LoadOp : CIR_Op<"load", [
   let assemblyFormat = [{
     (`deref` $isDeref^)?
     (`volatile` $is_volatile^)?
+    (`nontemporal` $nontemporal^)?
     (`align` `(` $alignment^ `)`)?
     (`syncscope` `(` $sync_scope^ `)`)?
     (`atomic` `(` $mem_order^ `)`)?
@@ -808,12 +810,14 @@ def CIR_StoreOp : CIR_Op<"store", [
                        Arg<CIR_PointerType, "the address to store the value",
                            [MemWrite]>:$addr,
                        UnitAttr:$is_volatile,
+                       UnitAttr:$nontemporal,
                        OptionalAttr<I64Attr>:$alignment,
                        OptionalAttr<CIR_SyncScopeKind>:$sync_scope,
                        OptionalAttr<CIR_MemOrder>:$mem_order);
 
   let assemblyFormat = [{
     (`volatile` $is_volatile^)?
+    (`nontemporal` $nontemporal^)?
     (`align` `(` $alignment^ `)`)?
     (`syncscope` `(` $sync_scope^ `)`)?
     (`atomic` `(` $mem_order^ `)`)?
diff --git a/clang/include/clang/CIR/MissingFeatures.h 
b/clang/include/clang/CIR/MissingFeatures.h
index 56ee2f4101a99..31958cd078d4d 100644
--- a/clang/include/clang/CIR/MissingFeatures.h
+++ b/clang/include/clang/CIR/MissingFeatures.h
@@ -47,7 +47,6 @@ struct MissingFeatures {
 
   // Load/store attributes
   static bool opLoadEmitScalarRangeCheck() { return false; }
-  static bool opLoadStoreNontemporal() { return false; }
   static bool opLoadStoreTbaa() { return false; }
   static bool opLoadStoreAtomic() { return false; }
   static bool opLoadStoreObjC() { return false; }
diff --git a/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp 
b/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp
index 3df0cd23d570e..bd6e2b685f767 100644
--- a/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp
@@ -630,6 +630,7 @@ static void emitAtomicOp(CIRGenFunction &cgf, AtomicExpr 
*expr, Address dest,
     assert(!cir::MissingFeatures::atomicSyncScopeID());
 
     builder.createStore(loc, loadVal1, ptr, expr->isVolatile(),
+                        /*isNontemporal=*/false,
                         /*align=*/mlir::IntegerAttr{}, scopeAttr, orderAttr);
     return;
   }
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuilder.h 
b/clang/lib/CIR/CodeGen/CIRGenBuilder.h
index aeb1a122429e2..ae8198e191bfc 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuilder.h
+++ b/clang/lib/CIR/CodeGen/CIRGenBuilder.h
@@ -626,10 +626,11 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy {
   }
 
   cir::LoadOp createLoad(mlir::Location loc, Address addr,
-                         bool isVolatile = false) {
+                         bool isVolatile = false, bool isNontemporal = false) {
     mlir::IntegerAttr align = getAlignmentAttr(addr.getAlignment());
     return cir::LoadOp::create(*this, loc, addr.getPointer(), 
/*isDeref=*/false,
-                               isVolatile, /*alignment=*/align,
+                               isVolatile, isNontemporal,
+                               /*alignment=*/align,
                                /*sync_scope=*/cir::SyncScopeKindAttr{},
                                /*mem_order=*/cir::MemOrderAttr{});
   }
@@ -641,7 +642,8 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy {
     uint64_t alignment = align ? align->value() : 0;
     mlir::IntegerAttr alignAttr = getAlignmentAttr(alignment);
     return cir::LoadOp::create(*this, loc, ptr, /*isDeref=*/false,
-                               /*isVolatile=*/false, alignAttr,
+                               /*isVolatile=*/false, /*isNontemporal=*/false,
+                               alignAttr,
                                /*sync_scope=*/cir::SyncScopeKindAttr{},
                                /*mem_order=*/cir::MemOrderAttr{});
   }
@@ -653,14 +655,14 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy {
   }
 
   cir::StoreOp createStore(mlir::Location loc, mlir::Value val, Address dst,
-                           bool isVolatile = false,
+                           bool isVolatile = false, bool isNontemporal = false,
                            mlir::IntegerAttr align = {},
                            cir::SyncScopeKindAttr scope = {},
                            cir::MemOrderAttr order = {}) {
     if (!align)
       align = getAlignmentAttr(dst.getAlignment());
     return CIRBaseBuilderTy::createStore(loc, val, dst.getPointer(), 
isVolatile,
-                                         align, scope, order);
+                                         isNontemporal, align, scope, order);
   }
 
   /// Create a cir.complex.real_ptr operation that derives a pointer to the 
real
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index afa7e5b91251b..dd48979eeb00a 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -2090,8 +2090,23 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl 
&gd, unsigned builtinID,
   case Builtin::BI__sync_lock_release_8:
   case Builtin::BI__sync_lock_release_16:
   case Builtin::BI__sync_synchronize:
-  case Builtin::BI__builtin_nontemporal_load:
-  case Builtin::BI__builtin_nontemporal_store:
+    return errorBuiltinNYI(*this, e, builtinID);
+  case Builtin::BI__builtin_nontemporal_load: {
+    Address addr = emitPointerWithAlignment(e->getArg(0));
+    mlir::Value val = emitLoadOfScalar(
+        addr, /*isVolatile=*/false, e->getType(), e->getExprLoc(),
+        LValueBaseInfo(AlignmentSource::Type), /*isNontemporal=*/true);
+    return RValue::get(val);
+  }
+  case Builtin::BI__builtin_nontemporal_store: {
+    mlir::Value val = emitScalarExpr(e->getArg(0));
+    Address addr = emitPointerWithAlignment(e->getArg(1));
+    val = emitToMemory(val, e->getArg(0)->getType());
+    emitStoreOfScalar(val, addr, /*isVolatile=*/false, e->getArg(0)->getType(),
+                      LValueBaseInfo(AlignmentSource::Type), /*isInit=*/false,
+                      /*isNontemporal=*/true);
+    return RValue::get(nullptr);
+  }
   case Builtin::BI__c11_atomic_is_lock_free:
   case Builtin::BI__atomic_is_lock_free:
   case Builtin::BI__atomic_test_and_set:
diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp 
b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
index 0f40516ee3537..f92ba41fcd146 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
@@ -499,12 +499,7 @@ void CIRGenFunction::emitStoreOfScalar(mlir::Value value, 
Address addr,
   }
 
   assert(currSrcLoc && "must pass in source location");
-  builder.createStore(*currSrcLoc, value, addr, isVolatile);
-
-  if (isNontemporal) {
-    cgm.errorNYI(addr.getPointer().getLoc(), "emitStoreOfScalar nontemporal");
-    return;
-  }
+  builder.createStore(*currSrcLoc, value, addr, isVolatile, isNontemporal);
 
   assert(!cir::MissingFeatures::opTBAA());
 }
@@ -741,7 +736,8 @@ void CIRGenFunction::emitStoreOfScalar(mlir::Value value, 
LValue lvalue,
 
 mlir::Value CIRGenFunction::emitLoadOfScalar(Address addr, bool isVolatile,
                                              QualType ty, SourceLocation loc,
-                                             LValueBaseInfo baseInfo) {
+                                             LValueBaseInfo baseInfo,
+                                             bool isNontemporal) {
   // Traditional LLVM codegen handles thread local separately, CIR handles
   // as part of getAddrOfGlobalVar (GetGlobalOp).
   mlir::Type eltTy = addr.getElementType();
@@ -771,7 +767,8 @@ mlir::Value CIRGenFunction::emitLoadOfScalar(Address addr, 
bool isVolatile,
 
   assert(!cir::MissingFeatures::opLoadEmitScalarRangeCheck());
 
-  mlir::Value loadOp = builder.createLoad(getLoc(loc), addr, isVolatile);
+  mlir::Value loadOp =
+      builder.createLoad(getLoc(loc), addr, isVolatile, isNontemporal);
   if (!ty->isBooleanType() && ty->hasBooleanRepresentation())
     cgm.errorNYI("emitLoadOfScalar: boolean type with boolean representation");
 
@@ -780,7 +777,6 @@ mlir::Value CIRGenFunction::emitLoadOfScalar(Address addr, 
bool isVolatile,
 
 mlir::Value CIRGenFunction::emitLoadOfScalar(LValue lvalue,
                                              SourceLocation loc) {
-  assert(!cir::MissingFeatures::opLoadStoreNontemporal());
   assert(!cir::MissingFeatures::opLoadStoreTbaa());
   return emitLoadOfScalar(lvalue.getAddress(), lvalue.isVolatile(),
                           lvalue.getType(), loc, lvalue.getBaseInfo());
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h 
b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index 9f2facd12f417..cf71985310459 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -1993,7 +1993,8 @@ class CIRGenFunction : public CIRGenTypeCache {
   /// l-value.
   mlir::Value emitLoadOfScalar(LValue lvalue, SourceLocation loc);
   mlir::Value emitLoadOfScalar(Address addr, bool isVolatile, QualType ty,
-                               SourceLocation loc, LValueBaseInfo baseInfo);
+                               SourceLocation loc, LValueBaseInfo baseInfo,
+                               bool isNontemporal = false);
 
   /// Emit code to compute a designator that specifies the location
   /// of the expression.
@@ -2203,6 +2204,7 @@ class CIRGenFunction : public CIRGenTypeCache {
       builder.restoreInsertionPoint(outermostConditional->getInsertPoint());
       builder.createStore(
           value.getLoc(), value, addr, /*isVolatile=*/false,
+          /*isNontemporal=*/false,
           mlir::IntegerAttr::get(
               mlir::IntegerType::get(value.getContext(), 64),
               (uint64_t)addr.getAlignment().getAsAlign().value()));
diff --git a/clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp 
b/clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp
index 8a82bcb19454e..73b35c7f00c2d 100644
--- a/clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp
+++ b/clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp
@@ -142,6 +142,7 @@ DeletionKind cir::CopyOp::removeBlockingUses(
   if (loadsFrom(slot))
     cir::StoreOp::create(builder, getLoc(), reachingDefinition, getDst(),
                          /*isVolatile=*/false,
+                         /*isNontemporal=*/false,
                          /*alignment=*/mlir::IntegerAttr{},
                          /*sync_scope=*/cir::SyncScopeKindAttr(),
                          /*mem-order=*/cir::MemOrderAttr());
diff --git a/clang/lib/CIR/Dialect/Transforms/EHABILowering.cpp 
b/clang/lib/CIR/Dialect/Transforms/EHABILowering.cpp
index 802740e800d7f..b586a281ca91b 100644
--- a/clang/lib/CIR/Dialect/Transforms/EHABILowering.cpp
+++ b/clang/lib/CIR/Dialect/Transforms/EHABILowering.cpp
@@ -750,7 +750,7 @@ void 
ItaniumEHLowering::lowerInitCatchParam(cir::InitCatchParamOp op) {
 
     mlir::Value casted = cir::CastOp::create(builder, loc, elementType,
                                              cir::CastKind::bitcast, exnPtr);
-    cir::StoreOp::create(builder, loc, casted, paramAddr, {}, {}, {}, {});
+    cir::StoreOp::create(builder, loc, casted, paramAddr, {}, {}, {}, {}, {});
     break;
   }
   case InitCatchKind::TrivialCopy: {
@@ -771,13 +771,13 @@ void 
ItaniumEHLowering::lowerInitCatchParam(cir::InitCatchParamOp op) {
                                              cir::CastKind::bitcast, exnPtr);
     auto loadOp = cir::LoadOp::create(builder, loc, elementType, srcPtr);
     cir::StoreOp::create(builder, loc, loadOp.getResult(), paramAddr, {}, {},
-                         {}, {});
+                         {}, {}, {});
     break;
   }
   case InitCatchKind::Pointer: {
     mlir::Value casted = cir::CastOp::create(builder, loc, elementType,
                                              cir::CastKind::bitcast, exnPtr);
-    cir::StoreOp::create(builder, loc, casted, paramAddr, {}, {}, {}, {});
+    cir::StoreOp::create(builder, loc, casted, paramAddr, {}, {}, {}, {}, {});
     break;
   }
   case InitCatchKind::Objc:
diff --git a/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp 
b/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp
index a21394dc62332..984e60a98dcef 100644
--- a/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp
+++ b/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp
@@ -956,16 +956,18 @@ class CIRCleanupScopeOpFlattening
           rewriter.setInsertionPoint(exitOp);
           cir::StoreOp::create(rewriter, loc, operand, alloca,
                                /*isVolatile=*/false,
+                               /*isNontemporal=*/false,
                                /*alignment=*/mlir::IntegerAttr(),
                                cir::SyncScopeKindAttr(), cir::MemOrderAttr());
         }
 
         // Reload the value from the temporary alloca in the destination block.
         rewriter.setInsertionPointToEnd(destBlock);
-        auto loaded = cir::LoadOp::create(
-            rewriter, loc, alloca, /*isDeref=*/false,
-            /*isVolatile=*/false, /*alignment=*/mlir::IntegerAttr(),
-            cir::SyncScopeKindAttr(), cir::MemOrderAttr());
+        auto loaded =
+            cir::LoadOp::create(rewriter, loc, alloca, /*isDeref=*/false,
+                                /*isVolatile=*/false, /*isNontemporal=*/false,
+                                /*alignment=*/mlir::IntegerAttr(),
+                                cir::SyncScopeKindAttr(), cir::MemOrderAttr());
         returnValues.push_back(loaded);
       }
     }
@@ -1274,10 +1276,11 @@ class CIRCleanupScopeOpFlattening
         rewriter.setInsertionPointToEnd(exitBlock);
 
         // Load the destination slot value.
-        auto slotValue = cir::LoadOp::create(
-            rewriter, loc, destSlot, /*isDeref=*/false,
-            /*isVolatile=*/false, /*alignment=*/mlir::IntegerAttr(),
-            cir::SyncScopeKindAttr(), cir::MemOrderAttr());
+        auto slotValue =
+            cir::LoadOp::create(rewriter, loc, destSlot, /*isDeref=*/false,
+                                /*isVolatile=*/false, /*isNontemporal=*/false,
+                                /*alignment=*/mlir::IntegerAttr(),
+                                cir::SyncScopeKindAttr(), cir::MemOrderAttr());
 
         // Create destination blocks for each exit and collect switch case 
info.
         llvm::SmallVector<mlir::APInt, 8> caseValues;
@@ -1306,6 +1309,7 @@ class CIRCleanupScopeOpFlattening
               rewriter, loc, cir::IntAttr::get(s32Type, exit.destinationId));
           cir::StoreOp::create(rewriter, loc, destIdConst, destSlot,
                                /*isVolatile=*/false,
+                               /*isNontemporal=*/false,
                                /*alignment=*/mlir::IntegerAttr(),
                                cir::SyncScopeKindAttr(), cir::MemOrderAttr());
           rewriter.replaceOpWithNewOp<cir::BrOp>(exit.exitOp, cleanupEntry);
diff --git 
a/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerItaniumCXXABI.cpp 
b/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerItaniumCXXABI.cpp
index 5c54103c60247..6edb5cc8425bd 100644
--- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerItaniumCXXABI.cpp
+++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerItaniumCXXABI.cpp
@@ -380,6 +380,7 @@ void LowerItaniumCXXABI::lowerGetMethod(
     mlir::Value vtablePtr =
         cir::LoadOp::create(b, loc, vtablePtrPtr, /*isDeref=*/false,
                             /*isVolatile=*/false,
+                            /*isNontemporal=*/false,
                             /*alignment=*/mlir::IntegerAttr(),
                             /*sync_scope=*/cir::SyncScopeKindAttr{},
                             /*mem_order=*/cir::MemOrderAttr());
@@ -407,6 +408,7 @@ void LowerItaniumCXXABI::lowerGetMethod(
                                              cir::CastKind::bitcast, vfpAddr);
     auto fnPtr = cir::LoadOp::create(b, loc, vfpPtr,
                                      /*isDeref=*/false, /*isVolatile=*/false,
+                                     /*isNontemporal=*/false,
                                      /*alignment=*/mlir::IntegerAttr(),
                                      /*sync_scope=*/cir::SyncScopeKindAttr{},
                                      /*mem_order=*/cir::MemOrderAttr());
@@ -780,6 +782,7 @@ static mlir::Value buildDynamicCastToVoidAfterNullCheck(
       builder, loc, vptrPtr,
       /*isDeref=*/false,
       /*is_volatile=*/false,
+      /*isNontemporal=*/false,
       /*alignment=*/builder.getI64IntegerAttr(vtableElemAlign),
       /*sync_scope=*/cir::SyncScopeKindAttr(),
       /*mem_order=*/cir::MemOrderAttr());
@@ -793,6 +796,7 @@ static mlir::Value buildDynamicCastToVoidAfterNullCheck(
       builder, loc, offsetToTopSlotPtr,
       /*isDeref=*/false,
       /*is_volatile=*/false,
+      /*isNontemporal=*/false,
       /*alignment=*/builder.getI64IntegerAttr(vtableElemAlign),
       /*sync_scope=*/cir::SyncScopeKindAttr(),
       /*mem_order=*/cir::MemOrderAttr());
@@ -902,6 +906,7 @@ mlir::Value LowerItaniumCXXABI::readArrayCookieImpl(
       builder, loc, countPtrTy, cir::CastKind::bitcast, countBytePtr);
   return cir::LoadOp::create(
       builder, loc, countPtr, /*isDeref=*/false, /*isVolatile=*/false,
+      /*isNontemporal=*/false,
       builder.getI64IntegerAttr(countAlignment.getQuantity()),
       cir::SyncScopeKindAttr(), cir::MemOrderAttr());
 }
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp 
b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index c4e98e299dfc1..515ac39a8de67 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -1860,15 +1860,12 @@ mlir::LogicalResult 
CIRToLLVMLoadOpLowering::matchAndRewrite(
 
   assert(!cir::MissingFeatures::lowerModeOptLevel());
 
-  // TODO: nontemporal.
-  assert(!cir::MissingFeatures::opLoadStoreNontemporal());
-
   std::optional<llvm::StringRef> llvmSyncScope =
       getLLVMSyncScope(op.getSyncScope());
 
   mlir::LLVM::LoadOp newLoad = mlir::LLVM::LoadOp::create(
       rewriter, op->getLoc(), llvmTy, adaptor.getAddr(), alignment,
-      op.getIsVolatile(), /*isNonTemporal=*/false,
+      op.getIsVolatile(), /*isNonTemporal=*/op.getNontemporal(),
       /*isInvariant=*/false, /*isInvariantGroup=*/false, ordering,
       llvmSyncScope.value_or(std::string()));
 
@@ -1916,8 +1913,6 @@ mlir::LogicalResult 
CIRToLLVMStoreOpLowering::matchAndRewrite(
   // Convert adapted value to its memory type if needed.
   mlir::Value value = emitToMemory(rewriter, dataLayout,
                                    op.getValue().getType(), 
adaptor.getValue());
-  // TODO: nontemporal.
-  assert(!cir::MissingFeatures::opLoadStoreNontemporal());
   assert(!cir::MissingFeatures::opLoadStoreTbaa());
 
   std::optional<llvm::StringRef> llvmSyncScope =
@@ -1926,8 +1921,8 @@ mlir::LogicalResult 
CIRToLLVMStoreOpLowering::matchAndRewrite(
   mlir::LLVM::StoreOp storeOp = mlir::LLVM::StoreOp::create(
       rewriter, op->getLoc(), value, adaptor.getAddr(), alignment,
       op.getIsVolatile(),
-      /*isNonTemporal=*/false, /*isInvariantGroup=*/false, memorder,
-      llvmSyncScope.value_or(std::string()));
+      /*isNonTemporal=*/op.getNontemporal(), /*isInvariantGroup=*/false,
+      memorder, llvmSyncScope.value_or(std::string()));
   rewriter.replaceOp(op, storeOp);
   assert(!cir::MissingFeatures::opLoadStoreTbaa());
   return mlir::LogicalResult::success();
diff --git a/clang/test/CIR/CodeGenBuiltins/builtin-nontemporal.cpp 
b/clang/test/CIR/CodeGenBuiltins/builtin-nontemporal.cpp
new file mode 100644
index 0000000000000..ec834049ecc44
--- /dev/null
+++ b/clang/test/CIR/CodeGenBuiltins/builtin-nontemporal.cpp
@@ -0,0 +1,77 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o 
- | FileCheck %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o 
- | FileCheck %s -check-prefix=LLVM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o - | 
FileCheck %s -check-prefix=LLVM
+
+signed char sc;
+unsigned char uc;
+signed short ss;
+unsigned short us;
+signed int si;
+unsigned int ui;
+signed long long sll;
+unsigned long long ull;
+float f1, f2;
+double d1, d2;
+
+void test_nontemporal_store() {
+// CIR-LABEL: cir.func {{.*}}@_Z22test_nontemporal_storev
+// CIR: cir.store nontemporal align(1) {{%.*}}, {{%.*}} : !u8i, !cir.ptr<!u8i>
+// CIR: cir.store nontemporal align(1) {{%.*}}, {{%.*}} : !u8i, !cir.ptr<!u8i>
+// CIR: cir.store nontemporal align(1) {{%.*}}, {{%.*}} : !s8i, !cir.ptr<!s8i>
+// CIR: cir.store nontemporal align(2) {{%.*}}, {{%.*}} : !u16i, 
!cir.ptr<!u16i>
+// CIR: cir.store nontemporal align(4) {{%.*}}, {{%.*}} : !s32i, 
!cir.ptr<!s32i>
+// CIR: cir.store nontemporal align(8) {{%.*}}, {{%.*}} : !u64i, 
!cir.ptr<!u64i>
+// CIR: cir.store nontemporal align(4) {{%.*}}, {{%.*}} : !cir.float, 
!cir.ptr<!cir.float>
+// CIR: cir.store nontemporal align(8) {{%.*}}, {{%.*}} : !cir.double, 
!cir.ptr<!cir.double>
+// CIR: cir.return
+
+// LLVM-LABEL: define dso_local void @_Z22test_nontemporal_storev
+// LLVM: store i8 1, ptr @uc, align 1, !nontemporal
+// LLVM: store i8 1, ptr @uc, align 1, !nontemporal
+// LLVM: store i8 1, ptr @sc, align 1, !nontemporal
+// LLVM: store i16 1, ptr @us, align 2, !nontemporal
+// LLVM: store i32 1, ptr @si, align 4, !nontemporal
+// LLVM: store i64 1, ptr @ull, align 8, !nontemporal
+// LLVM: store float 1.0{{.*}}, ptr @f1, align 4, !nontemporal
+// LLVM: store double 1.0{{.*}}, ptr @d1, align 8, !nontemporal
+// LLVM: ret void
+
+  __builtin_nontemporal_store(true, &uc);
+  __builtin_nontemporal_store(1, &uc);
+  __builtin_nontemporal_store(1, &sc);
+  __builtin_nontemporal_store(1, &us);
+  __builtin_nontemporal_store(1, &si);
+  __builtin_nontemporal_store(1, &ull);
+  __builtin_nontemporal_store(1.0, &f1);
+  __builtin_nontemporal_store(1.0, &d1);
+}
+
+void test_nontemporal_load() {
+// CIR-LABEL: cir.func {{.*}}@_Z21test_nontemporal_loadv
+// CIR: cir.load nontemporal align(1) {{%.*}} : !cir.ptr<!s8i>, !s8i
+// CIR: cir.load nontemporal align(1) {{%.*}} : !cir.ptr<!u8i>, !u8i
+// CIR: cir.load nontemporal align(2) {{%.*}} : !cir.ptr<!s16i>, !s16i
+// CIR: cir.load nontemporal align(4) {{%.*}} : !cir.ptr<!u32i>, !u32i
+// CIR: cir.load nontemporal align(8) {{%.*}} : !cir.ptr<!s64i>, !s64i
+// CIR: cir.load nontemporal align(4) {{%.*}} : !cir.ptr<!cir.float>, 
!cir.float
+// CIR: cir.load nontemporal align(8) {{%.*}} : !cir.ptr<!cir.double>, 
!cir.double
+// CIR: cir.return
+
+// LLVM-LABEL: define dso_local void @_Z21test_nontemporal_loadv
+// LLVM: load i8, ptr @sc, align 1, !nontemporal
+// LLVM: load i8, ptr @uc, align 1, !nontemporal
+// LLVM: load i16, ptr @ss, align 2, !nontemporal
+// LLVM: load i32, ptr @ui, align 4, !nontemporal
+// LLVM: load i64, ptr @sll, align 8, !nontemporal
+// LLVM: load float, ptr @f2, align 4, !nontemporal
+// LLVM: load double, ptr @d2, align 8, !nontemporal
+// LLVM: ret void
+
+  uc = __builtin_nontemporal_load(&sc);
+  sc = __builtin_nontemporal_load(&uc);
+  us = __builtin_nontemporal_load(&ss);
+  si = __builtin_nontemporal_load(&ui);
+  ull = __builtin_nontemporal_load(&sll);
+  f1 = __builtin_nontemporal_load(&f2);
+  d1 = __builtin_nontemporal_load(&d2);
+}

>From 1dbf2b01cbc5b846d1b672e333b272d59876e04e Mon Sep 17 00:00:00 2001
From: Jianjian GUAN <[email protected]>
Date: Tue, 19 May 2026 11:40:53 +0800
Subject: [PATCH 2/3] fix rebase

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinNVPTX.cpp       | 1 +
 clang/lib/CIR/Dialect/Transforms/EHABILowering.cpp | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinNVPTX.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinNVPTX.cpp
index 3c654761b9903..4db2d7259c6ba 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinNVPTX.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinNVPTX.cpp
@@ -1067,6 +1067,7 @@ static mlir::Value 
packArgsIntoNVPTXFormatBuffer(CIRGenFunction &cgf,
         dataLayout.getABITypeAlign(argTypes[i]).value());
     cir::StoreOp::create(builder, loc, arg.getKnownRValue().getValue(), member,
                          /*is_volatile=*/false,
+                         /*isNontemporal=*/false,
                          builder.getAlignmentAttr(abiAlign),
                          /*sync_scope=*/cir::SyncScopeKindAttr{},
                          /*mem_order=*/cir::MemOrderAttr{});
diff --git a/clang/lib/CIR/Dialect/Transforms/EHABILowering.cpp 
b/clang/lib/CIR/Dialect/Transforms/EHABILowering.cpp
index b586a281ca91b..918eecb3eee49 100644
--- a/clang/lib/CIR/Dialect/Transforms/EHABILowering.cpp
+++ b/clang/lib/CIR/Dialect/Transforms/EHABILowering.cpp
@@ -641,7 +641,7 @@ 
ItaniumEHLowering::lowerConstructCatchParam(cir::ConstructCatchParamOp op,
     mlir::Value casted =
         cir::CastOp::create(builder, loc, paramAddrType.getPointee(),
                             cir::CastKind::bitcast, exnObj);
-    cir::StoreOp::create(builder, loc, casted, paramAddr, {}, {}, {}, {});
+    cir::StoreOp::create(builder, loc, casted, paramAddr, {}, {}, {}, {}, {});
     op.erase();
     return success();
   }

>From 646c0729eaeba0d5f4dfd85b3b6e9d3e05c0057f Mon Sep 17 00:00:00 2001
From: Jianjian GUAN <[email protected]>
Date: Tue, 2 Jun 2026 16:52:45 +0800
Subject: [PATCH 3/3] Address comment

---
 clang/include/clang/CIR/Dialect/IR/CIROps.td       |  8 ++++----
 clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp            | 14 ++++++++------
 clang/lib/CIR/CodeGen/CIRGenExpr.cpp               |  5 +++--
 clang/lib/CIR/CodeGen/CIRGenValue.h                |  7 +++++++
 .../lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp  |  4 ++--
 5 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td 
b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index b95622f7a2b89..63773791b66a3 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -709,7 +709,7 @@ def CIR_LoadOp : CIR_Op<"load", [
                            [MemRead]>:$addr,
                        UnitAttr:$isDeref,
                        UnitAttr:$is_volatile,
-                       UnitAttr:$nontemporal,
+                       UnitAttr:$is_nontemporal,
                        OptionalAttr<I64Attr>:$alignment,
                        OptionalAttr<CIR_SyncScopeKind>:$sync_scope,
                        OptionalAttr<CIR_MemOrder>:$mem_order);
@@ -718,7 +718,7 @@ def CIR_LoadOp : CIR_Op<"load", [
   let assemblyFormat = [{
     (`deref` $isDeref^)?
     (`volatile` $is_volatile^)?
-    (`nontemporal` $nontemporal^)?
+    (`nontemporal` $is_nontemporal^)?
     (`align` `(` $alignment^ `)`)?
     (`syncscope` `(` $sync_scope^ `)`)?
     (`atomic` `(` $mem_order^ `)`)?
@@ -810,14 +810,14 @@ def CIR_StoreOp : CIR_Op<"store", [
                        Arg<CIR_PointerType, "the address to store the value",
                            [MemWrite]>:$addr,
                        UnitAttr:$is_volatile,
-                       UnitAttr:$nontemporal,
+                       UnitAttr:$is_nontemporal,
                        OptionalAttr<I64Attr>:$alignment,
                        OptionalAttr<CIR_SyncScopeKind>:$sync_scope,
                        OptionalAttr<CIR_MemOrder>:$mem_order);
 
   let assemblyFormat = [{
     (`volatile` $is_volatile^)?
-    (`nontemporal` $nontemporal^)?
+    (`nontemporal` $is_nontemporal^)?
     (`align` `(` $alignment^ `)`)?
     (`syncscope` `(` $sync_scope^ `)`)?
     (`atomic` `(` $mem_order^ `)`)?
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index dd48979eeb00a..9674c0af800e1 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -2093,18 +2093,20 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl 
&gd, unsigned builtinID,
     return errorBuiltinNYI(*this, e, builtinID);
   case Builtin::BI__builtin_nontemporal_load: {
     Address addr = emitPointerWithAlignment(e->getArg(0));
-    mlir::Value val = emitLoadOfScalar(
-        addr, /*isVolatile=*/false, e->getType(), e->getExprLoc(),
-        LValueBaseInfo(AlignmentSource::Type), /*isNontemporal=*/true);
+    LValue lv = makeAddrLValue(addr, e->getType(),
+                               LValueBaseInfo(AlignmentSource::Type));
+    lv.setNontemporal(true);
+    mlir::Value val = emitLoadOfScalar(lv, e->getExprLoc());
     return RValue::get(val);
   }
   case Builtin::BI__builtin_nontemporal_store: {
     mlir::Value val = emitScalarExpr(e->getArg(0));
     Address addr = emitPointerWithAlignment(e->getArg(1));
     val = emitToMemory(val, e->getArg(0)->getType());
-    emitStoreOfScalar(val, addr, /*isVolatile=*/false, e->getArg(0)->getType(),
-                      LValueBaseInfo(AlignmentSource::Type), /*isInit=*/false,
-                      /*isNontemporal=*/true);
+    LValue lv = makeAddrLValue(addr, e->getArg(0)->getType(),
+                               LValueBaseInfo(AlignmentSource::Type));
+    lv.setNontemporal(true);
+    emitStoreOfScalar(val, lv, /*isInit=*/false);
     return RValue::get(nullptr);
   }
   case Builtin::BI__c11_atomic_is_lock_free:
diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp 
b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
index f92ba41fcd146..a1d522212bd90 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
@@ -731,7 +731,7 @@ void CIRGenFunction::emitStoreOfScalar(mlir::Value value, 
LValue lvalue,
 
   emitStoreOfScalar(value, lvalue.getAddress(), lvalue.isVolatile(),
                     lvalue.getType(), lvalue.getBaseInfo(), isInit,
-                    /*isNontemporal=*/false);
+                    lvalue.isNontemporal());
 }
 
 mlir::Value CIRGenFunction::emitLoadOfScalar(Address addr, bool isVolatile,
@@ -779,7 +779,8 @@ mlir::Value CIRGenFunction::emitLoadOfScalar(LValue lvalue,
                                              SourceLocation loc) {
   assert(!cir::MissingFeatures::opLoadStoreTbaa());
   return emitLoadOfScalar(lvalue.getAddress(), lvalue.isVolatile(),
-                          lvalue.getType(), loc, lvalue.getBaseInfo());
+                          lvalue.getType(), loc, lvalue.getBaseInfo(),
+                          lvalue.isNontemporal());
 }
 
 /// Given an expression that represents a value lvalue, this
diff --git a/clang/lib/CIR/CodeGen/CIRGenValue.h 
b/clang/lib/CIR/CodeGen/CIRGenValue.h
index e70dac5851189..b291b8c76f1ad 100644
--- a/clang/lib/CIR/CodeGen/CIRGenValue.h
+++ b/clang/lib/CIR/CodeGen/CIRGenValue.h
@@ -173,6 +173,9 @@ class LValue {
   mlir::Type elementType;
   LValueBaseInfo baseInfo;
   const CIRGenBitFieldInfo *bitFieldInfo{nullptr};
+  // This flag shows if a nontemporal load/stores should be used when accessing
+  // this lvalue.
+  bool nontemporal;
 
   void initialize(clang::QualType type, clang::Qualifiers quals,
                   clang::CharUnits alignment, LValueBaseInfo baseInfo) {
@@ -187,6 +190,7 @@ class LValue {
     assert(this->alignment == alignment.getQuantity() &&
            "Alignment exceeds allowed max!");
     this->baseInfo = baseInfo;
+    this->nontemporal = false;
   }
 
 public:
@@ -200,6 +204,9 @@ class LValue {
 
   bool isVolatileQualified() const { return quals.hasVolatile(); }
 
+  bool isNontemporal() const { return nontemporal; }
+  void setNontemporal(bool v) { nontemporal = v; }
+
   unsigned getVRQualifiers() const {
     return quals.getCVRQualifiers() & ~clang::Qualifiers::Const;
   }
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp 
b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 515ac39a8de67..1fe1d7dd52f97 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -1865,7 +1865,7 @@ mlir::LogicalResult 
CIRToLLVMLoadOpLowering::matchAndRewrite(
 
   mlir::LLVM::LoadOp newLoad = mlir::LLVM::LoadOp::create(
       rewriter, op->getLoc(), llvmTy, adaptor.getAddr(), alignment,
-      op.getIsVolatile(), /*isNonTemporal=*/op.getNontemporal(),
+      op.getIsVolatile(), /*isNonTemporal=*/op.getIsNontemporal(),
       /*isInvariant=*/false, /*isInvariantGroup=*/false, ordering,
       llvmSyncScope.value_or(std::string()));
 
@@ -1921,7 +1921,7 @@ mlir::LogicalResult 
CIRToLLVMStoreOpLowering::matchAndRewrite(
   mlir::LLVM::StoreOp storeOp = mlir::LLVM::StoreOp::create(
       rewriter, op->getLoc(), value, adaptor.getAddr(), alignment,
       op.getIsVolatile(),
-      /*isNonTemporal=*/op.getNontemporal(), /*isInvariantGroup=*/false,
+      /*isNonTemporal=*/op.getIsNontemporal(), /*isInvariantGroup=*/false,
       memorder, llvmSyncScope.value_or(std::string()));
   rewriter.replaceOp(op, storeOp);
   assert(!cir::MissingFeatures::opLoadStoreTbaa());

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to