mzolotukhin created this revision.
mzolotukhin added reviewers: hfinkel, doug.gregor, t.p.northover, ab, mcrosier.
mzolotukhin added a subscriber: cfe-commits.
Herald added a subscriber: aemerson.

Currently there is no way to generate nontemporal memory accesses for some
architectures, e.g. for AArch64. In contrast to x86, it doesn't have special
intrinsics for this, and the suggested solution is using such attribute (see ARM
ACLE 2.0, section 13.1.6). The attribute would result in generating
'!nontemporal' attribute in IR, which then will (hopefully) live through
optimizations till backend, where it will be lowered to a non-temporal
instruction (for AArch64 - to STNP). I have committed a couple of patches for
vectorizers to preserve this attribute, and it seems that no other
transformation removes it.

So, is introducing a new type attribute a right approach for this problem?

Also, since I don't have much experience in front-end, I'd appreciate any help
with the patch itself to get it ready to be committed. Specifically, I currently
have following questions:
1) What tests should I add (examples would be appreciated)?
2) How does one implements constraints on how the attribute can be used, what
should be the constraints in this case, and how to properly implement them?
3) How can I check if I covered all places where this attribute might be used in
codegen? I.e. I seem to cover array-subscript and pointer-dereference
expressions, which is probaly the only cases I care about, but I easily could
miss something.

Any other feedback is also welcome!

Thanks,
Michael

http://reviews.llvm.org/D12221

Files:
  include/clang/AST/Type.h
  include/clang/Basic/Attr.td
  lib/AST/Type.cpp
  lib/AST/TypePrinter.cpp
  lib/CodeGen/CGExpr.cpp
  lib/CodeGen/CGValue.h
  lib/CodeGen/CodeGenFunction.cpp
  lib/CodeGen/CodeGenFunction.h
  lib/Sema/SemaType.cpp
  test/CodeGen/nontemporal.cpp

Index: test/CodeGen/nontemporal.cpp
===================================================================
--- /dev/null
+++ test/CodeGen/nontemporal.cpp
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | FileCheck %s
+typedef float * PtrT;
+typedef float * __attribute__((nontemporal)) NonTemporalPtrT;
+
+// CHECK-LABEL: @_Z3fooPfS_S_i
+void foo(NonTemporalPtrT a, NonTemporalPtrT b, PtrT c, int N) {
+// CHECK-DAG:  [[VALUE_B:%.+]] = load float, float* %{{[0-9a-z._]+}}, align 4, !nontemporal ![[NT:[0-9]+]]
+// CHECK-DAG:  [[VALUE_C:%.+]] = load float, float* %{{[0-9a-z._]+}}, align 4{{$}}
+// CHECK:      [[VALUE_SUB:%.+]] = fsub float [[VALUE_B]], [[VALUE_C]]
+// CHECK:      store float [[VALUE_SUB]], float* %{{[0-9a-z._]+}}, align 4, !nontemporal ![[NT]]
+  a[N] = b[N] - c[N];
+}
+
+// CHECK-LABEL: @_Z4foo2PfS_S_
+void foo2(NonTemporalPtrT a, NonTemporalPtrT b, PtrT c) {
+// CHECK-DAG:  [[VALUE_B:%.+]] = load float, float* %{{[0-9]+}}, align 4, !nontemporal ![[NT:[0-9]+]]
+// CHECK-DAG:  [[VALUE_C:%.+]] = load float, float* %{{[0-9]+}}, align 4{{$}}
+// CHECK:      [[VALUE_SUB:%.+]] = fsub float [[VALUE_B]], [[VALUE_C]]
+// CHECK:      store float [[VALUE_SUB]], float* %{{[0-9]+}}, align 4, !nontemporal ![[NT]]
+  *a = *b - *c;
+}
Index: lib/Sema/SemaType.cpp
===================================================================
--- lib/Sema/SemaType.cpp
+++ lib/Sema/SemaType.cpp
@@ -644,6 +644,7 @@
 
     // Objective-C __kindof does not get distributed.
     case AttributeList::AT_ObjCKindOf:
+    case AttributeList::AT_TypeNonTemporal:
       continue;
 
     default:
@@ -4436,6 +4437,8 @@
     return AttributeList::AT_TypeNonNull;
   case AttributedType::attr_nullable:
     return AttributeList::AT_TypeNullable;
+  case AttributedType::attr_nontemporal:
+    return AttributeList::AT_TypeNonTemporal;
   case AttributedType::attr_null_unspecified:
     return AttributeList::AT_TypeNullUnspecified;
   case AttributedType::attr_objc_kindof:
@@ -6191,6 +6194,12 @@
       }
       break;
 
+    case AttributeList::AT_TypeNonTemporal:
+      type = state.getSema().Context.getAttributedType(
+          AttributedType::attr_nontemporal, type, type);
+      attr.setUsedAsTypeAttr();
+      break;
+
     case AttributeList::AT_ObjCKindOf:
       // '__kindof' must be part of the decl-specifiers.
       switch (TAL) {
Index: lib/CodeGen/CodeGenFunction.h
===================================================================
--- lib/CodeGen/CodeGenFunction.h
+++ lib/CodeGen/CodeGenFunction.h
@@ -1497,12 +1497,14 @@
   //===--------------------------------------------------------------------===//
 
   LValue MakeAddrLValue(llvm::Value *V, QualType T,
-                        CharUnits Alignment = CharUnits()) {
+                        CharUnits Alignment = CharUnits(),
+                        bool isNonTemporal = false) {
     return LValue::MakeAddr(V, T, Alignment, getContext(),
-                            CGM.getTBAAInfo(T));
+                            CGM.getTBAAInfo(T), isNonTemporal);
   }
 
-  LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T);
+  LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T,
+                                    bool isNonTemporal = false);
 
   /// CreateTempAlloca - This creates a alloca and inserts it into the entry
   /// block. The caller is responsible for setting an appropriate alignment on
@@ -2343,7 +2345,8 @@
                                 SourceLocation Loc,
                                 llvm::MDNode *TBAAInfo = nullptr,
                                 QualType TBAABaseTy = QualType(),
-                                uint64_t TBAAOffset = 0);
+                                uint64_t TBAAOffset = 0,
+                                bool isNonTemporal = false);
 
   /// EmitLoadOfScalar - Load a scalar value from an address, taking
   /// care to appropriately convert from the memory representation to
@@ -2358,7 +2361,7 @@
                          bool Volatile, unsigned Alignment, QualType Ty,
                          llvm::MDNode *TBAAInfo = nullptr, bool isInit = false,
                          QualType TBAABaseTy = QualType(),
-                         uint64_t TBAAOffset = 0);
+                         uint64_t TBAAOffset = 0, bool isNonTemporal = false);
 
   /// EmitStoreOfScalar - Store a scalar value to an address, taking
   /// care to appropriately convert from the memory representation to
Index: lib/CodeGen/CodeGenFunction.cpp
===================================================================
--- lib/CodeGen/CodeGenFunction.cpp
+++ lib/CodeGen/CodeGenFunction.cpp
@@ -91,16 +91,18 @@
   }
 }
 
-LValue CodeGenFunction::MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T) {
+LValue CodeGenFunction::MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T,
+                                                   bool isNonTemporal) {
   CharUnits Alignment;
   if (CGM.getCXXABI().isTypeInfoCalculable(T)) {
     Alignment = getContext().getTypeAlignInChars(T);
     unsigned MaxAlign = getContext().getLangOpts().MaxTypeAlign;
     if (MaxAlign && Alignment.getQuantity() > MaxAlign &&
         !getContext().isAlignmentRequired(T))
       Alignment = CharUnits::fromQuantity(MaxAlign);
   }
-  return LValue::MakeAddr(V, T, Alignment, getContext(), CGM.getTBAAInfo(T));
+  return LValue::MakeAddr(V, T, Alignment, getContext(), CGM.getTBAAInfo(T),
+                          isNonTemporal);
 }
 
 llvm::Type *CodeGenFunction::ConvertTypeForMem(QualType T) {
Index: lib/CodeGen/CGValue.h
===================================================================
--- lib/CodeGen/CGValue.h
+++ lib/CodeGen/CGValue.h
@@ -137,6 +137,9 @@
   // this is the alignment of the whole vector.)
   int64_t Alignment;
 
+  // Non-temporality attribute to use when accessing this lvalue.
+  bool isNonTemporal : 1;
+
   // objective-c's ivar
   bool Ivar:1;
   
@@ -170,13 +173,16 @@
 private:
   void Initialize(QualType Type, Qualifiers Quals,
                   CharUnits Alignment,
-                  llvm::MDNode *TBAAInfo = nullptr) {
+                  llvm::MDNode *TBAAInfo = nullptr,
+                  bool isNonTemporal = false) {
     this->Type = Type;
     this->Quals = Quals;
     this->Alignment = Alignment.getQuantity();
     assert(this->Alignment == Alignment.getQuantity() &&
            "Alignment exceeds allowed max!");
 
+    this->isNonTemporal = isNonTemporal;
+
     // Initialize Objective-C flags.
     this->Ivar = this->ObjIsArray = this->NonGC = this->GlobalObjCRef = false;
     this->ImpreciseLifetime = false;
@@ -260,6 +266,8 @@
 
   CharUnits getAlignment() const { return CharUnits::fromQuantity(Alignment); }
   void setAlignment(CharUnits A) { Alignment = A.getQuantity(); }
+  bool getNT() const { return isNonTemporal; }
+  void setNT(bool nt) { isNonTemporal = nt; }
 
   // simple lvalue
   llvm::Value *getAddress() const { assert(isSimple()); return V; }
@@ -294,20 +302,21 @@
 
   static LValue MakeAddr(llvm::Value *address, QualType type,
                          CharUnits alignment, ASTContext &Context,
-                         llvm::MDNode *TBAAInfo = nullptr) {
+                         llvm::MDNode *TBAAInfo = nullptr,
+                         bool isNonTemporal = false) {
     Qualifiers qs = type.getQualifiers();
     qs.setObjCGCAttr(Context.getObjCGCAttrKind(type));
 
     LValue R;
     R.LVType = Simple;
     assert(address->getType()->isPointerTy());
     R.V = address;
-    R.Initialize(type, qs, alignment, TBAAInfo);
+    R.Initialize(type, qs, alignment, TBAAInfo, isNonTemporal);
     return R;
   }
 
-  static LValue MakeVectorElt(llvm::Value *Vec, llvm::Value *Idx,
-                              QualType type, CharUnits Alignment) {
+  static LValue MakeVectorElt(llvm::Value *Vec, llvm::Value *Idx, QualType type,
+                              CharUnits Alignment, bool isNonTemporal = false) {
     LValue R;
     R.LVType = VectorElt;
     R.V = Vec;
@@ -317,7 +326,8 @@
   }
 
   static LValue MakeExtVectorElt(llvm::Value *Vec, llvm::Constant *Elts,
-                                 QualType type, CharUnits Alignment) {
+                                 QualType type, CharUnits Alignment,
+                                 bool isNonTemporal = false) {
     LValue R;
     R.LVType = ExtVectorElt;
     R.V = Vec;
Index: lib/CodeGen/CGExpr.cpp
===================================================================
--- lib/CodeGen/CGExpr.cpp
+++ lib/CodeGen/CGExpr.cpp
@@ -1059,7 +1059,8 @@
   return EmitLoadOfScalar(lvalue.getAddress(), lvalue.isVolatile(),
                           lvalue.getAlignment().getQuantity(),
                           lvalue.getType(), Loc, lvalue.getTBAAInfo(),
-                          lvalue.getTBAABaseType(), lvalue.getTBAAOffset());
+                          lvalue.getTBAABaseType(), lvalue.getTBAAOffset(),
+                          lvalue.getNT());
 }
 
 static bool hasBooleanRepresentation(QualType Ty) {
@@ -1124,7 +1125,8 @@
                                                SourceLocation Loc,
                                                llvm::MDNode *TBAAInfo,
                                                QualType TBAABaseType,
-                                               uint64_t TBAAOffset) {
+                                               uint64_t TBAAOffset,
+                                               bool isNonTemporal) {
   // For better performance, handle vector loads differently.
   if (Ty->isVectorType()) {
     llvm::Value *V;
@@ -1168,6 +1170,16 @@
     Load->setVolatile(true);
   if (Alignment)
     Load->setAlignment(Alignment);
+  if (isNonTemporal) {
+    llvm::LLVMContext &C = Load->getContext();
+    llvm::Module *M = Load->getModule();
+    SmallVector<llvm::Metadata *, 1> Elts;
+    Elts.push_back(llvm::ConstantAsMetadata::get(
+        llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), 1)));
+    llvm::MDNode *Node = llvm::MDNode::get(C, Elts);
+    Load->setMetadata(M->getMDKindID("nontemporal"), Node);
+  }
+
   if (TBAAInfo) {
     llvm::MDNode *TBAAPath = CGM.getTBAAStructTagInfo(TBAABaseType, TBAAInfo,
                                                       TBAAOffset);
@@ -1239,7 +1251,8 @@
                                         bool Volatile, unsigned Alignment,
                                         QualType Ty, llvm::MDNode *TBAAInfo,
                                         bool isInit, QualType TBAABaseType,
-                                        uint64_t TBAAOffset) {
+                                        uint64_t TBAAOffset,
+                                        bool isNonTemporal) {
 
   // Handle vectors differently to get better performance.
   if (Ty->isVectorType()) {
@@ -1280,6 +1293,16 @@
   llvm::StoreInst *Store = Builder.CreateStore(Value, Addr, Volatile);
   if (Alignment)
     Store->setAlignment(Alignment);
+  if (isNonTemporal) {
+    llvm::LLVMContext &C = Store->getContext();
+    llvm::Module *M = Store->getModule();
+    SmallVector<llvm::Metadata *, 1> Elts;
+    Elts.push_back(llvm::ConstantAsMetadata::get(
+        llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), 1)));
+    llvm::MDNode *Node = llvm::MDNode::get(C, Elts);
+    Store->setMetadata(M->getMDKindID("nontemporal"), Node);
+  }
+
   if (TBAAInfo) {
     llvm::MDNode *TBAAPath = CGM.getTBAAStructTagInfo(TBAABaseType, TBAAInfo,
                                                       TBAAOffset);
@@ -1293,7 +1316,7 @@
   EmitStoreOfScalar(value, lvalue.getAddress(), lvalue.isVolatile(),
                     lvalue.getAlignment().getQuantity(), lvalue.getType(),
                     lvalue.getTBAAInfo(), isInit, lvalue.getTBAABaseType(),
-                    lvalue.getTBAAOffset());
+                    lvalue.getTBAAOffset(), lvalue.getNT());
 }
 
 /// EmitLoadOfLValue - Given an expression that represents a value lvalue, this
@@ -1323,6 +1346,16 @@
     llvm::LoadInst *Load = Builder.CreateLoad(LV.getVectorAddr(),
                                               LV.isVolatileQualified());
     Load->setAlignment(LV.getAlignment().getQuantity());
+    if (LV.getNT()) {
+      llvm::LLVMContext &C = Load->getContext();
+      llvm::Module *M = Load->getModule();
+      SmallVector<llvm::Metadata *, 1> Elts;
+      Elts.push_back(llvm::ConstantAsMetadata::get(
+          llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), 1)));
+      llvm::MDNode *Node = llvm::MDNode::get(C, Elts);
+      Load->setMetadata(M->getMDKindID("nontemporal"), Node);
+    }
+
     return RValue::get(Builder.CreateExtractElement(Load, LV.getVectorIdx(),
                                                     "vecext"));
   }
@@ -2034,7 +2067,10 @@
     QualType T = E->getSubExpr()->getType()->getPointeeType();
     assert(!T.isNull() && "CodeGenFunction::EmitUnaryOpLValue: Illegal type");
 
-    LValue LV = MakeNaturalAlignAddrLValue(EmitScalarExpr(E->getSubExpr()), T);
+    LValue LV = MakeNaturalAlignAddrLValue(
+        EmitScalarExpr(E->getSubExpr()), T,
+        E->getSubExpr()->IgnoreImpCasts()->getType()->hasNonTemporalAttr(
+            getContext()));
     LV.getQuals().setAddressSpace(ExprTy.getAddressSpace());
 
     // We should not generate __weak write barrier on indirect reference
@@ -2538,15 +2574,18 @@
   assert(!T.isNull() &&
          "CodeGenFunction::EmitArraySubscriptExpr(): Illegal base type");
 
+  bool isNonTemporal =
+      E->getBase()->IgnoreImpCasts()->getType()->hasNonTemporalAttr(
+          getContext());
 
   // Limit the alignment to that of the result type.
   LValue LV;
   if (!ArrayAlignment.isZero()) {
     CharUnits Align = getContext().getTypeAlignInChars(T);
     ArrayAlignment = std::min(Align, ArrayAlignment);
-    LV = MakeAddrLValue(Address, T, ArrayAlignment);
+    LV = MakeAddrLValue(Address, T, ArrayAlignment, isNonTemporal);
   } else {
-    LV = MakeNaturalAlignAddrLValue(Address, T);
+    LV = MakeNaturalAlignAddrLValue(Address, T, isNonTemporal);
   }
 
   LV.getQuals().setAddressSpace(E->getBase()->getType().getAddressSpace());
Index: lib/AST/TypePrinter.cpp
===================================================================
--- lib/AST/TypePrinter.cpp
+++ lib/AST/TypePrinter.cpp
@@ -1212,6 +1212,11 @@
     OS << ')';
     break;
 
+  case AttributedType::attr_nontemporal: {
+    OS << "nontemporal";
+    break;
+  }
+
   case AttributedType::attr_vector_size: {
     OS << "__vector_size__(";
     if (const VectorType *vector =T->getEquivalentType()->getAs<VectorType>()) {
Index: lib/AST/Type.cpp
===================================================================
--- lib/AST/Type.cpp
+++ lib/AST/Type.cpp
@@ -2887,6 +2887,7 @@
   case attr_nullable:
   case attr_null_unspecified:
   case attr_objc_kindof:
+  case attr_nontemporal:
     return false;
 
   case attr_pcs:
@@ -3317,6 +3318,25 @@
   return LV;
 }
 
+bool Type::hasNonTemporalAttr(const ASTContext &context) const {
+  QualType type(this, 0);
+  do {
+    // Check whether this is an attributed type with nullability
+    // information.
+    if (auto attributed = dyn_cast<AttributedType>(type.getTypePtr())) {
+      if (attributed->getAttrKind() == AttributedType::attr_nontemporal)
+        return true;
+    }
+
+    // Desugar the type. If desugaring does nothing, we're done.
+    QualType desugared = type.getSingleStepDesugaredType(context);
+    if (desugared.getTypePtr() == type.getTypePtr())
+      return false;
+
+    type = desugared;
+  } while (true);
+}
+
 Optional<NullabilityKind> Type::getNullability(const ASTContext &context) const {
   QualType type(this, 0);
   do {
Index: include/clang/Basic/Attr.td
===================================================================
--- include/clang/Basic/Attr.td
+++ include/clang/Basic/Attr.td
@@ -1006,6 +1006,11 @@
   let Documentation = [TypeNullUnspecifiedDocs];
 }
 
+def TypeNonTemporal : TypeAttr {
+  let Spellings = [GCC<"nontemporal">];
+  let Documentation = [Undocumented];
+}
+
 def ObjCKindOf : TypeAttr {
   let Spellings = [Keyword<"__kindof">];
   let Documentation = [Undocumented];
Index: include/clang/AST/Type.h
===================================================================
--- include/clang/AST/Type.h
+++ include/clang/AST/Type.h
@@ -1913,6 +1913,13 @@
   /// be lost by canonicalization and desugaring.
   Optional<NullabilityKind> getNullability(const ASTContext &context) const;
 
+  /// Determine the non-temporality of the given type.
+  ///
+  /// The non-temporal attribute is only captured as sugar within the type
+  /// system, not as part of the cacnonical type, so it will be lost by
+  /// canonicalization and desugaring.
+  bool hasNonTemporalAttr(const ASTContext &context) const;
+
   /// Determine whether the given type can have a nullability
   /// specifier applied to it, i.e., if it is any kind of pointer type
   /// or a dependent type that could instantiate to any kind of
@@ -3611,6 +3618,7 @@
     attr_nullable,
     attr_null_unspecified,
     attr_objc_kindof,
+    attr_nontemporal,
   };
 
 private:
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to