https://github.com/llvm-beanz updated 
https://github.com/llvm/llvm-project/pull/101083

>From e8ec3e24e0061714b5dc440b1b92086965809483 Mon Sep 17 00:00:00 2001
From: Chris Bieneman <chris.biene...@me.com>
Date: Mon, 29 Jul 2024 16:05:11 -0500
Subject: [PATCH 01/10] [HLSL] Implement output parameter

HLSL output parameters are denoted with the `inout` and `out` keywords
in the function declaration. When an argument to an output parameter is
constructed a temporary value is constructed for the argument.

For `inout` pamameters the argument is intialized by casting the
argument expression to the parameter type. For `out` parameters the
argument is not initialized before the call.

In both cases on return of the function the temporary value is written
back to the argument lvalue expression through an optional casting
sequence if required.

This change introduces a new HLSLOutArgExpr ast node which represents
the output argument behavior. The OutArgExpr has two defined children:
the base expresion and the writeback expression. The writeback
expression will either be or contain an OpaqueValueExpr child
expression which is used during code generation to represent the
temporary value.
---
 clang/include/clang/AST/ASTContext.h          |   8 +
 clang/include/clang/AST/Attr.h                |  38 ++--
 clang/include/clang/AST/Expr.h                |  61 +++++++
 clang/include/clang/AST/RecursiveASTVisitor.h |   3 +
 clang/include/clang/AST/TextNodeDumper.h      |   1 +
 clang/include/clang/Basic/Attr.td             |   3 +-
 .../clang/Basic/DiagnosticSemaKinds.td        |   2 +
 clang/include/clang/Basic/Specifiers.h        |   6 +
 clang/include/clang/Basic/StmtNodes.td        |   3 +
 clang/include/clang/Sema/SemaHLSL.h           |   2 +
 .../include/clang/Serialization/ASTBitCodes.h |   3 +
 clang/lib/AST/ASTContext.cpp                  |  14 ++
 clang/lib/AST/Expr.cpp                        |  11 ++
 clang/lib/AST/ExprClassification.cpp          |   1 +
 clang/lib/AST/ExprConstant.cpp                |   1 +
 clang/lib/AST/ItaniumMangle.cpp               |  12 ++
 clang/lib/AST/StmtPrinter.cpp                 |   4 +
 clang/lib/AST/StmtProfile.cpp                 |   4 +
 clang/lib/AST/TextNodeDumper.cpp              |   4 +
 clang/lib/AST/TypePrinter.cpp                 |  20 ++-
 clang/lib/CodeGen/CGCall.cpp                  |  56 ++++++
 clang/lib/CodeGen/CGCall.h                    |  18 +-
 clang/lib/CodeGen/CGExpr.cpp                  |  25 +++
 clang/lib/CodeGen/CodeGenFunction.h           |   2 +
 clang/lib/Sema/SemaChecking.cpp               |  13 ++
 clang/lib/Sema/SemaDecl.cpp                   |   4 +
 clang/lib/Sema/SemaExceptionSpec.cpp          |   1 +
 clang/lib/Sema/SemaExpr.cpp                   |  19 +-
 clang/lib/Sema/SemaHLSL.cpp                   |  60 ++++++-
 clang/lib/Sema/SemaOverload.cpp               |   4 +
 clang/lib/Sema/SemaSwift.cpp                  |   3 +
 clang/lib/Sema/SemaType.cpp                   |   2 +
 clang/lib/Sema/TreeTransform.h                |  29 +++
 clang/lib/Serialization/ASTReaderStmt.cpp     |  22 ++-
 clang/lib/Serialization/ASTWriterStmt.cpp     |  12 ++
 clang/lib/StaticAnalyzer/Core/ExprEngine.cpp  |   3 +-
 clang/test/AST/HLSL/OutArgExpr.hlsl           |  65 +++++++
 .../BasicFeatures/OutputArguments.hlsl        | 128 +++++++++++++
 .../SemaHLSL/Language/OutputParameters.hlsl   |  34 ++++
 .../SemaHLSL/Language/TemplateOutArg.hlsl     | 169 ++++++++++++++++++
 clang/test/SemaHLSL/parameter_modifiers.hlsl  |   8 +-
 .../SemaHLSL/parameter_modifiers_ast.hlsl     |  20 +--
 clang/tools/libclang/CXCursor.cpp             |   1 +
 43 files changed, 854 insertions(+), 45 deletions(-)
 create mode 100644 clang/test/AST/HLSL/OutArgExpr.hlsl
 create mode 100644 clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl
 create mode 100644 clang/test/SemaHLSL/Language/OutputParameters.hlsl
 create mode 100644 clang/test/SemaHLSL/Language/TemplateOutArg.hlsl

diff --git a/clang/include/clang/AST/ASTContext.h 
b/clang/include/clang/AST/ASTContext.h
index 6d1c8ca8a2f961..e81c7170a022ce 100644
--- a/clang/include/clang/AST/ASTContext.h
+++ b/clang/include/clang/AST/ASTContext.h
@@ -1377,6 +1377,14 @@ class ASTContext : public RefCountedBase<ASTContext> {
   /// in the return type and parameter types.
   bool hasSameFunctionTypeIgnoringPtrSizes(QualType T, QualType U);
 
+  /// Get or construct a function type that is equivalent to the input type
+  /// except that the parameter ABI annotations are stripped.
+  QualType getFunctionTypeWithoutParamABIs(QualType T);
+
+  /// Determine if two function types are the same, ignoring parameter ABI
+  /// annotations.
+  bool hasSameFunctionTypeIgnoringParamABI(QualType T, QualType U);
+
   /// Return the uniqued reference to the type for a complex
   /// number with the specified element type.
   QualType getComplexType(QualType T) const;
diff --git a/clang/include/clang/AST/Attr.h b/clang/include/clang/AST/Attr.h
index 8e9b7ad8b46826..00e3c9d9ab347f 100644
--- a/clang/include/clang/AST/Attr.h
+++ b/clang/include/clang/AST/Attr.h
@@ -224,20 +224,7 @@ class ParameterABIAttr : public InheritableParamAttr {
                              InheritEvenIfAlreadyPresent) {}
 
 public:
-  ParameterABI getABI() const {
-    switch (getKind()) {
-    case attr::SwiftContext:
-      return ParameterABI::SwiftContext;
-    case attr::SwiftAsyncContext:
-      return ParameterABI::SwiftAsyncContext;
-    case attr::SwiftErrorResult:
-      return ParameterABI::SwiftErrorResult;
-    case attr::SwiftIndirectResult:
-      return ParameterABI::SwiftIndirectResult;
-    default:
-      llvm_unreachable("bad parameter ABI attribute kind");
-    }
-  }
+  ParameterABI getABI() const;
 
   static bool classof(const Attr *A) {
     return A->getKind() >= attr::FirstParameterABIAttr &&
@@ -379,6 +366,29 @@ inline const StreamingDiagnostic &operator<<(const 
StreamingDiagnostic &DB,
   DB.AddTaggedVal(reinterpret_cast<uint64_t>(At), DiagnosticsEngine::ak_attr);
   return DB;
 }
+
+inline ParameterABI ParameterABIAttr::getABI() const {
+  switch (getKind()) {
+  case attr::SwiftContext:
+    return ParameterABI::SwiftContext;
+  case attr::SwiftAsyncContext:
+    return ParameterABI::SwiftAsyncContext;
+  case attr::SwiftErrorResult:
+    return ParameterABI::SwiftErrorResult;
+  case attr::SwiftIndirectResult:
+    return ParameterABI::SwiftIndirectResult;
+  case attr::HLSLParamModifier: {
+    const auto *A = cast<HLSLParamModifierAttr>(this);
+    if (A->isOut())
+      return ParameterABI::HLSLOut;
+    if (A->isInOut())
+      return ParameterABI::HLSLInOut;
+    return ParameterABI::Ordinary;
+  }
+  default:
+    llvm_unreachable("bad parameter ABI attribute kind");
+  }
+}
 }  // end namespace clang
 
 #endif
diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index 5b813bfc2faf90..83eed4827cfe20 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -7061,6 +7061,67 @@ class ArraySectionExpr : public Expr {
   void setRBracketLoc(SourceLocation L) { RBracketLoc = L; }
 };
 
+/// This class represents temporary values used to represent inout and out
+/// arguments in HLSL. From the callee perspective these parameters are more or
+/// less __restrict__ T&. They are guaranteed to not alias any memory. inout
+/// parameters are initialized by the caller, and out parameters are references
+/// to uninitialized memory.
+///
+/// In the caller, the argument expression creates a temporary in local memory
+/// and the address of the temporary is passed into the callee. There may be
+/// implicit conversion sequences to initialize the temporary, and on 
expiration
+/// of the temporary an inverse conversion sequence is applied as a write-back
+/// conversion to the source l-value.
+class HLSLOutArgExpr : public Expr {
+  friend class ASTStmtReader;
+
+  Expr *Base;
+  Expr *Writeback;
+  OpaqueValueExpr *OpaqueVal;
+  bool IsInOut;
+
+  HLSLOutArgExpr(QualType Ty, Expr *B, Expr *WB, OpaqueValueExpr *OpV,
+                 bool IsInOut)
+      : Expr(HLSLOutArgExprClass, Ty, VK_LValue, OK_Ordinary), Base(B),
+        Writeback(WB), OpaqueVal(OpV), IsInOut(IsInOut) {
+    assert(!Ty->isDependentType() && "HLSLOutArgExpr given a dependent type!");
+  }
+
+  explicit HLSLOutArgExpr(EmptyShell Shell)
+      : Expr(HLSLOutArgExprClass, Shell) {}
+
+public:
+  static HLSLOutArgExpr *Create(const ASTContext &C, QualType Ty, Expr *Base,
+                                bool IsInOut, Expr *WB, OpaqueValueExpr *OpV);
+  static HLSLOutArgExpr *CreateEmpty(const ASTContext &Ctx);
+
+  const Expr *getBase() const { return Base; }
+  Expr *getBase() { return Base; }
+
+  const Expr *getWriteback() const { return Writeback; }
+  Expr *getWriteback() { return Writeback; }
+
+  const OpaqueValueExpr *getOpaqueValue() const { return OpaqueVal; }
+  OpaqueValueExpr *getOpaqueValue() { return OpaqueVal; }
+
+  bool isInOut() const { return IsInOut; }
+
+  SourceLocation getBeginLoc() const LLVM_READONLY {
+    return Base->getBeginLoc();
+  }
+
+  SourceLocation getEndLoc() const LLVM_READONLY { return Base->getEndLoc(); }
+
+  static bool classof(const Stmt *T) {
+    return T->getStmtClass() == HLSLOutArgExprClass;
+  }
+
+  // Iterators
+  child_range children() {
+    return child_range((Stmt **)&Base, ((Stmt **)&Writeback) + 1);
+  }
+};
+
 /// Frontend produces RecoveryExprs on semantic errors that prevent creating
 /// other well-formed expressions. E.g. when type-checking of a binary operator
 /// fails, we cannot produce a BinaryOperator expression. Instead, we can 
choose
diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h 
b/clang/include/clang/AST/RecursiveASTVisitor.h
index e3c0cb46799f72..27c29099c57cf6 100644
--- a/clang/include/clang/AST/RecursiveASTVisitor.h
+++ b/clang/include/clang/AST/RecursiveASTVisitor.h
@@ -4014,6 +4014,9 @@ DEF_TRAVERSE_STMT(OpenACCComputeConstruct,
 DEF_TRAVERSE_STMT(OpenACCLoopConstruct,
                   { TRY_TO(TraverseOpenACCAssociatedStmtConstruct(S)); })
 
+// Traverse HLSL: Out argument expression
+DEF_TRAVERSE_STMT(HLSLOutArgExpr, {})
+
 // FIXME: look at the following tricky-seeming exprs to see if we
 // need to recurse on anything.  These are ones that have methods
 // returning decls or qualtypes or nestednamespecifier -- though I'm
diff --git a/clang/include/clang/AST/TextNodeDumper.h 
b/clang/include/clang/AST/TextNodeDumper.h
index 39dd1f515c9eb3..261853343a0113 100644
--- a/clang/include/clang/AST/TextNodeDumper.h
+++ b/clang/include/clang/AST/TextNodeDumper.h
@@ -407,6 +407,7 @@ class TextNodeDumper
   void
   VisitLifetimeExtendedTemporaryDecl(const LifetimeExtendedTemporaryDecl *D);
   void VisitHLSLBufferDecl(const HLSLBufferDecl *D);
+  void VisitHLSLOutArgExpr(const HLSLOutArgExpr *E);
   void VisitOpenACCConstructStmt(const OpenACCConstructStmt *S);
   void VisitOpenACCLoopConstruct(const OpenACCLoopConstruct *S);
   void VisitEmbedExpr(const EmbedExpr *S);
diff --git a/clang/include/clang/Basic/Attr.td 
b/clang/include/clang/Basic/Attr.td
index 46d0a66d59c375..6186161e6b182b 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -4613,14 +4613,13 @@ def HLSLGroupSharedAddressSpace : TypeAttr {
   let Documentation = [HLSLGroupSharedAddressSpaceDocs];
 }
 
-def HLSLParamModifier : TypeAttr {
+def HLSLParamModifier : ParameterABIAttr {
   let Spellings = [CustomKeyword<"in">, CustomKeyword<"inout">, 
CustomKeyword<"out">];
   let Accessors = [Accessor<"isIn", [CustomKeyword<"in">]>,
                    Accessor<"isInOut", [CustomKeyword<"inout">]>,
                    Accessor<"isOut", [CustomKeyword<"out">]>,
                    Accessor<"isAnyOut", [CustomKeyword<"out">, 
CustomKeyword<"inout">]>,
                    Accessor<"isAnyIn", [CustomKeyword<"in">, 
CustomKeyword<"inout">]>];
-  let Subjects = SubjectList<[ParmVar]>;
   let Documentation = [HLSLParamQualifierDocs];
   let Args = [DefaultBoolArgument<"MergedSpelling", /*default*/0, /*fake*/1>];
 }
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 581434d33c5c9a..c499ee8ac5906e 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -12357,6 +12357,8 @@ def warn_hlsl_availability : Warning<
 def warn_hlsl_availability_unavailable :
   Warning<err_unavailable.Summary>,
   InGroup<HLSLAvailability>, DefaultError;
+def error_hlsl_inout_scalar_extension : Error<"illegal scalar extension cast 
on argument %0 to %select{|in}1out paramemter">;
+def error_hlsl_inout_lvalue : Error<"cannot bind non-lvalue argument %0 to 
%select{|in}1out paramemter">;
 
 def err_hlsl_export_not_on_function : Error<
   "export declaration can only be used on functions">;
diff --git a/clang/include/clang/Basic/Specifiers.h 
b/clang/include/clang/Basic/Specifiers.h
index fb11e8212f8b68..0ffd9e06cf3e5e 100644
--- a/clang/include/clang/Basic/Specifiers.h
+++ b/clang/include/clang/Basic/Specifiers.h
@@ -382,6 +382,12 @@ namespace clang {
     /// Swift asynchronous context-pointer ABI treatment.  There can be at
     /// most one parameter on a given function that uses this treatment.
     SwiftAsyncContext,
+
+    // This parameter is a copy-out HLSL parameter.
+    HLSLOut,
+
+    // This parameter is a copy-in/copy-out HLSL parameter.
+    HLSLInOut,
   };
 
   /// Assigned inheritance model for a class in the MS C++ ABI. Must match 
order
diff --git a/clang/include/clang/Basic/StmtNodes.td 
b/clang/include/clang/Basic/StmtNodes.td
index 9bf23fae50a9e7..a80601b1e4a942 100644
--- a/clang/include/clang/Basic/StmtNodes.td
+++ b/clang/include/clang/Basic/StmtNodes.td
@@ -306,3 +306,6 @@ def OpenACCAssociatedStmtConstruct
     : StmtNode<OpenACCConstructStmt, /*abstract=*/1>;
 def OpenACCComputeConstruct : StmtNode<OpenACCAssociatedStmtConstruct>;
 def OpenACCLoopConstruct : StmtNode<OpenACCAssociatedStmtConstruct>;
+
+// HLSL Constructs.
+def HLSLOutArgExpr : StmtNode<Expr>;
diff --git a/clang/include/clang/Sema/SemaHLSL.h 
b/clang/include/clang/Sema/SemaHLSL.h
index 2ddbee67c414bb..64b565787f3257 100644
--- a/clang/include/clang/Sema/SemaHLSL.h
+++ b/clang/include/clang/Sema/SemaHLSL.h
@@ -61,6 +61,8 @@ class SemaHLSL : public SemaBase {
   void handleParamModifierAttr(Decl *D, const ParsedAttr &AL);
 
   bool CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
+
+  ExprResult ActOnOutParamExpr(ParmVarDecl *Param, Expr *Arg);
 };
 
 } // namespace clang
diff --git a/clang/include/clang/Serialization/ASTBitCodes.h 
b/clang/include/clang/Serialization/ASTBitCodes.h
index 5dd0ba33f8a9c2..c19d750d30d56d 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -1988,6 +1988,9 @@ enum StmtCode {
   // OpenACC Constructs
   STMT_OPENACC_COMPUTE_CONSTRUCT,
   STMT_OPENACC_LOOP_CONSTRUCT,
+
+  // HLSL Constructs
+  EXPR_HLSL_OUT_ARG,
 };
 
 /// The kinds of designators that can occur in a
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index a465cdfcf3c89e..750928fc009280 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -3590,6 +3590,20 @@ bool 
ASTContext::hasSameFunctionTypeIgnoringPtrSizes(QualType T, QualType U) {
                      getFunctionTypeWithoutPtrSizes(U));
 }
 
+QualType ASTContext::getFunctionTypeWithoutParamABIs(QualType T) {
+  if (const auto *Proto = T->getAs<FunctionProtoType>()) {
+    FunctionProtoType::ExtProtoInfo EPI = Proto->getExtProtoInfo();
+    EPI.ExtParameterInfos = nullptr;
+    return getFunctionType(Proto->getReturnType(), Proto->param_types(), EPI);
+  }
+  return T;
+}
+
+bool ASTContext::hasSameFunctionTypeIgnoringParamABI(QualType T, QualType U) {
+  return hasSameType(T, U) || hasSameType(getFunctionTypeWithoutParamABIs(T),
+                                          getFunctionTypeWithoutParamABIs(U));
+}
+
 void ASTContext::adjustExceptionSpec(
     FunctionDecl *FD, const FunctionProtoType::ExceptionSpecInfo &ESI,
     bool AsWritten) {
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index 9d5b8167d0ee62..be12e6e93cc453 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -3631,6 +3631,7 @@ bool Expr::HasSideEffects(const ASTContext &Ctx,
   case RequiresExprClass:
   case SYCLUniqueStableNameExprClass:
   case PackIndexingExprClass:
+  case HLSLOutArgExprClass:
     // These never have a side-effect.
     return false;
 
@@ -5318,3 +5319,13 @@ OMPIteratorExpr *OMPIteratorExpr::CreateEmpty(const 
ASTContext &Context,
       alignof(OMPIteratorExpr));
   return new (Mem) OMPIteratorExpr(EmptyShell(), NumIterators);
 }
+
+HLSLOutArgExpr *HLSLOutArgExpr::Create(const ASTContext &C, QualType Ty,
+                                       Expr *Base, bool IsInOut, Expr *WB,
+                                       OpaqueValueExpr *OpV) {
+  return new (C) HLSLOutArgExpr(Ty, Base, WB, OpV, IsInOut);
+}
+
+HLSLOutArgExpr *HLSLOutArgExpr::CreateEmpty(const ASTContext &C) {
+  return new (C) HLSLOutArgExpr(EmptyShell());
+}
diff --git a/clang/lib/AST/ExprClassification.cpp 
b/clang/lib/AST/ExprClassification.cpp
index 6482cb6d39acc6..ebbfaa187263fa 100644
--- a/clang/lib/AST/ExprClassification.cpp
+++ b/clang/lib/AST/ExprClassification.cpp
@@ -148,6 +148,7 @@ static Cl::Kinds ClassifyInternal(ASTContext &Ctx, const 
Expr *E) {
   case Expr::ArraySectionExprClass:
   case Expr::OMPArrayShapingExprClass:
   case Expr::OMPIteratorExprClass:
+  case Expr::HLSLOutArgExprClass:
     return Cl::CL_LValue;
 
     // C99 6.5.2.5p5 says that compound literals are lvalues.
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 558e20ed3e4239..c692d47ffd1afa 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -16469,6 +16469,7 @@ static ICEDiag CheckICE(const Expr* E, const ASTContext 
&Ctx) {
   case Expr::CoyieldExprClass:
   case Expr::SYCLUniqueStableNameExprClass:
   case Expr::CXXParenListInitExprClass:
+  case Expr::HLSLOutArgExprClass:
     return ICEDiag(IK_NotICE, E->getBeginLoc());
 
   case Expr::InitListExprClass: {
diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index d46d621d4c7d41..1a1c316b90c4e6 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -3507,6 +3507,12 @@ 
CXXNameMangler::mangleExtParameterInfo(FunctionProtoType::ExtParameterInfo PI) {
   case ParameterABI::Ordinary:
     break;
 
+  // HLSL parameter mangling.
+  case ParameterABI::HLSLOut:
+  case ParameterABI::HLSLInOut:
+    mangleVendorQualifier(getParameterABISpelling(PI.getABI()));
+    break;
+
   // All of these start with "swift", so they come before "ns_consumed".
   case ParameterABI::SwiftContext:
   case ParameterABI::SwiftAsyncContext:
@@ -5703,6 +5709,12 @@ void CXXNameMangler::mangleExpression(const Expr *E, 
unsigned Arity,
     Out << "E";
     break;
   }
+  case Expr::HLSLOutArgExprClass: {
+    const auto *OAE = cast<clang::HLSLOutArgExpr>(E);
+    Out << (OAE->isInOut() ? "_inout_" : "_out_");
+    mangleType(E->getType());
+    break;
+  }
   }
 
   if (AsTemplateArg && !IsPrimaryExpr)
diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp
index 69e0b763e8ddcc..0a2ac8c16671ad 100644
--- a/clang/lib/AST/StmtPrinter.cpp
+++ b/clang/lib/AST/StmtPrinter.cpp
@@ -2799,6 +2799,10 @@ void StmtPrinter::VisitAsTypeExpr(AsTypeExpr *Node) {
   OS << ")";
 }
 
+void StmtPrinter::VisitHLSLOutArgExpr(HLSLOutArgExpr *Node) {
+  PrintExpr(Node->getBase());
+}
+
 
//===----------------------------------------------------------------------===//
 // Stmt method implementations
 
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp
index 89d2a422509d81..37812812ee2b37 100644
--- a/clang/lib/AST/StmtProfile.cpp
+++ b/clang/lib/AST/StmtProfile.cpp
@@ -2631,6 +2631,10 @@ void StmtProfiler::VisitOpenACCLoopConstruct(const 
OpenACCLoopConstruct *S) {
   P.VisitOpenACCClauseList(S->clauses());
 }
 
+void StmtProfiler::VisitHLSLOutArgExpr(const HLSLOutArgExpr *S) {
+  VisitStmt(S);
+}
+
 void Stmt::Profile(llvm::FoldingSetNodeID &ID, const ASTContext &Context,
                    bool Canonical, bool ProfileLambdaExpr) const {
   StmtProfilerWithPointers Profiler(ID, Context, Canonical, ProfileLambdaExpr);
diff --git a/clang/lib/AST/TextNodeDumper.cpp b/clang/lib/AST/TextNodeDumper.cpp
index 5ba9523504258e..ff88f4aec98a53 100644
--- a/clang/lib/AST/TextNodeDumper.cpp
+++ b/clang/lib/AST/TextNodeDumper.cpp
@@ -2874,6 +2874,10 @@ void TextNodeDumper::VisitHLSLBufferDecl(const 
HLSLBufferDecl *D) {
   dumpName(D);
 }
 
+void TextNodeDumper::VisitHLSLOutArgExpr(const HLSLOutArgExpr *E) {
+  OS << (E->isInOut() ? " inout" : " out");
+}
+
 void TextNodeDumper::VisitOpenACCConstructStmt(const OpenACCConstructStmt *S) {
   OS << " " << S->getDirectiveKind();
 }
diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp
index ffec3ef9d22692..b88e9b8f7f4715 100644
--- a/clang/lib/AST/TypePrinter.cpp
+++ b/clang/lib/AST/TypePrinter.cpp
@@ -933,6 +933,10 @@ StringRef clang::getParameterABISpelling(ParameterABI ABI) 
{
     return "swift_error_result";
   case ParameterABI::SwiftIndirectResult:
     return "swift_indirect_result";
+  case ParameterABI::HLSLOut:
+    return "out";
+  case ParameterABI::HLSLInOut:
+    return "inout";
   }
   llvm_unreachable("bad parameter ABI kind");
 }
@@ -955,7 +959,17 @@ void TypePrinter::printFunctionProtoAfter(const 
FunctionProtoType *T,
       if (EPI.isNoEscape())
         OS << "__attribute__((noescape)) ";
       auto ABI = EPI.getABI();
-      if (ABI != ParameterABI::Ordinary)
+      if (ABI == ParameterABI::HLSLInOut || ABI == ParameterABI::HLSLOut) {
+        OS << getParameterABISpelling(ABI) << " ";
+        if (Policy.UseHLSLTypes) {
+          // This is a bit of a hack because we _do_ use reference types in the
+          // AST for representing inout and out parameters so that code
+          // generation is sane, but when re-printing these for HLSL we need to
+          // skip the reference.
+          print(T->getParamType(i).getNonReferenceType(), OS, StringRef());
+          continue;
+        }
+      } else if (ABI != ParameterABI::Ordinary)
         OS << "__attribute__((" << getParameterABISpelling(ABI) << ")) ";
 
       print(T->getParamType(i), OS, StringRef());
@@ -2023,10 +2037,6 @@ void TypePrinter::printAttributedAfter(const 
AttributedType *T,
   case attr::ArmMveStrictPolymorphism:
     OS << "__clang_arm_mve_strict_polymorphism";
     break;
-
-  // Nothing to print for this attribute.
-  case attr::HLSLParamModifier:
-    break;
   }
   OS << "))";
 }
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 2f3dd5d01fa6c9..fd1788893b7ac3 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -2830,6 +2830,9 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
     }
 
     switch (FI.getExtParameterInfo(ArgNo).getABI()) {
+    case ParameterABI::HLSLOut:
+    case ParameterABI::HLSLInOut:
+      // FIXME: Do this...
     case ParameterABI::Ordinary:
       break;
 
@@ -4148,6 +4151,30 @@ static void emitWriteback(CodeGenFunction &CGF,
   assert(!isProvablyNull(srcAddr.getBasePointer()) &&
          "shouldn't have writeback for provably null argument");
 
+  if (CGF.getLangOpts().HLSL) {
+    if (writeback.CastExpr) {
+      RValue TmpVal = CGF.EmitAnyExprToTemp(writeback.CastExpr);
+      if (TmpVal.isScalar())
+        CGF.EmitStoreThroughLValue(TmpVal, srcLV);
+      else
+        CGF.EmitAggregateStore(srcLV.getPointer(CGF),
+                               TmpVal.getAggregateAddress(), false);
+    } else {
+      if (srcLV.isSimple())
+        CGF.EmitAggregateStore(srcLV.getPointer(CGF), writeback.Temporary,
+                               false);
+      else {
+        llvm::Value *value = CGF.Builder.CreateLoad(writeback.Temporary);
+        RValue TmpVal = RValue::get(value);
+        CGF.EmitStoreThroughLValue(TmpVal, srcLV);
+      }
+    }
+    if (writeback.LifetimeSz)
+      CGF.EmitLifetimeEnd(writeback.LifetimeSz,
+                          writeback.Temporary.getBasePointer());
+    return;
+  }
+
   llvm::BasicBlock *contBB = nullptr;
 
   // If the argument wasn't provably non-null, we need to null check
@@ -4610,6 +4637,9 @@ void CodeGenFunction::EmitCallArgs(
     // Un-reverse the arguments we just evaluated so they match up with the 
LLVM
     // IR function.
     std::reverse(Args.begin() + CallArgsStart, Args.end());
+
+    // Reverse the writebacks to match the MSVC ABI.
+    Args.reverseWritebacks();
   }
 }
 
@@ -4689,6 +4719,32 @@ void CodeGenFunction::EmitCallArg(CallArgList &args, 
const Expr *E,
   assert(type->isReferenceType() == E->isGLValue() &&
          "reference binding to unmaterialized r-value!");
 
+  // Add writeback for HLSLOutParamExpr.
+  if (const HLSLOutArgExpr *OE = dyn_cast<HLSLOutArgExpr>(E)) {
+    LValue LV = EmitLValue(E);
+    llvm::Type *ElTy = ConvertTypeForMem(LV.getType());
+    llvm::Value *Addr, *BaseAddr;
+    if (LV.isExtVectorElt()) {
+      llvm::Constant *VecElts = LV.getExtVectorElts();
+      BaseAddr = LV.getExtVectorAddress().getBasePointer();
+      Addr = Builder.CreateGEP(
+          ElTy, BaseAddr,
+          {Builder.getInt32(0), VecElts->getAggregateElement((unsigned)0)});
+    } else // LV.getAddress() will assert if this is not a simple LValue.
+      Addr = BaseAddr = LV.getAddress().getBasePointer();
+
+    llvm::TypeSize Sz =
+        CGM.getDataLayout().getTypeAllocSize(ConvertTypeForMem(LV.getType()));
+
+    llvm::Value *LifetimeSize = EmitLifetimeStart(Sz, BaseAddr);
+
+    Address TmpAddr(Addr, ElTy, LV.getAlignment());
+    // TODO-HLSLOutArgExp: Fix me!!!
+    args.addWriteback(EmitLValue(OE->getBase()->IgnoreImpCasts()), TmpAddr,
+                      nullptr, OE->getWriteback(), LifetimeSize);
+    return args.add(RValue::get(TmpAddr, *this), type);
+  }
+
   if (E->isGLValue()) {
     assert(E->getObjectKind() == OK_Ordinary);
     return args.add(EmitReferenceBindingToExpr(E), type);
diff --git a/clang/lib/CodeGen/CGCall.h b/clang/lib/CodeGen/CGCall.h
index 412b44a8c753aa..3c3fc8f8a8b6d6 100644
--- a/clang/lib/CodeGen/CGCall.h
+++ b/clang/lib/CodeGen/CGCall.h
@@ -285,6 +285,13 @@ class CallArgList : public SmallVector<CallArg, 8> {
 
     /// A value to "use" after the writeback, or null.
     llvm::Value *ToUse;
+
+    /// An Expression representing a cast from the temporary's type to the
+    /// source l-value's type.
+    const Expr *CastExpr;
+
+    // Size for optional lifetime end on the temporary.
+    llvm::Value *LifetimeSz;
   };
 
   struct CallArgCleanup {
@@ -316,8 +323,10 @@ class CallArgList : public SmallVector<CallArg, 8> {
       StackBase = other.StackBase;
   }
 
-  void addWriteback(LValue srcLV, Address temporary, llvm::Value *toUse) {
-    Writeback writeback = {srcLV, temporary, toUse};
+  void addWriteback(LValue srcLV, Address temporary, llvm::Value *toUse,
+                    const Expr *castExpr = nullptr,
+                    llvm::Value *lifetimeSz = nullptr) {
+    Writeback writeback = {srcLV, temporary, toUse, castExpr, lifetimeSz};
     Writebacks.push_back(writeback);
   }
 
@@ -350,6 +359,11 @@ class CallArgList : public SmallVector<CallArg, 8> {
   /// memory.
   bool isUsingInAlloca() const { return StackBase; }
 
+  // Support reversing writebacks for MSVC ABI.
+  void reverseWritebacks() {
+    std::reverse(Writebacks.begin(), Writebacks.end());
+  }
+
 private:
   SmallVector<Writeback, 1> Writebacks;
 
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 5f58a64d8386c3..2c8c31c8bdf529 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -1671,6 +1671,8 @@ LValue CodeGenFunction::EmitLValueHelper(const Expr *E,
     return EmitCoyieldLValue(cast<CoyieldExpr>(E));
   case Expr::PackIndexingExprClass:
     return EmitLValue(cast<PackIndexingExpr>(E)->getSelectedExpr());
+  case Expr::HLSLOutArgExprClass:
+    return EmitHLSLOutArgExpr(cast<HLSLOutArgExpr>(E));
   }
 }
 
@@ -5386,6 +5388,29 @@ LValue CodeGenFunction::EmitOpaqueValueLValue(const 
OpaqueValueExpr *e) {
   return getOrCreateOpaqueLValueMapping(e);
 }
 
+LValue CodeGenFunction::BindHLSLOutArgExpr(const HLSLOutArgExpr *E,
+                                           Address OutTemp) {
+  LValue Result = MakeAddrLValue(OutTemp, E->getType());
+  OpaqueValueMappingData::bind(*this, E->getOpaqueValue(), Result);
+  return Result;
+}
+
+LValue CodeGenFunction::EmitHLSLOutArgExpr(const HLSLOutArgExpr *E) {
+  if (!E->isInOut())
+    return BindHLSLOutArgExpr(E, CreateIRTemp(E->getType()));
+
+  RValue InVal = EmitAnyExprToTemp(E->getBase());
+  if (!InVal.isScalar())
+    return BindHLSLOutArgExpr(E, InVal.getAggregateAddress());
+
+  Address OutTemp = CreateIRTemp(E->getType());
+  llvm::Value *V = InVal.getScalarVal();
+  if (V->getType()->getScalarType()->isIntegerTy(1))
+    V = Builder.CreateZExt(V, ConvertTypeForMem(E->getType()), "frombool");
+  (void)Builder.CreateStore(V, OutTemp);
+  return BindHLSLOutArgExpr(E, OutTemp);
+}
+
 LValue
 CodeGenFunction::getOrCreateOpaqueLValueMapping(const OpaqueValueExpr *e) {
   assert(OpaqueValueMapping::shouldBindAsLValue(e));
diff --git a/clang/lib/CodeGen/CodeGenFunction.h 
b/clang/lib/CodeGen/CodeGenFunction.h
index 89cc819c43bb56..6464fbcc95b444 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4293,6 +4293,8 @@ class CodeGenFunction : public CodeGenTypeCache {
   LValue EmitCastLValue(const CastExpr *E);
   LValue EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *E);
   LValue EmitOpaqueValueLValue(const OpaqueValueExpr *e);
+  LValue EmitHLSLOutArgExpr(const HLSLOutArgExpr *E);
+  LValue BindHLSLOutArgExpr(const HLSLOutArgExpr *E, Address OutTemp);
 
   Address EmitExtVectorElementLValue(LValue V);
 
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index bb30b1e289a1c0..0723d8a4c60985 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -11427,6 +11427,19 @@ static void AnalyzeImplicitConversions(
     return;
   }
 
+  if (auto *OutArgE = dyn_cast<HLSLOutArgExpr>(E)) {
+    // The base expression is only used to initialize the parameter for
+    // arguments to `inout` parameters, so we only traverse down the base
+    // expression for `inout` cases.
+    if (OutArgE->isInOut())
+      WorkList.push_back({OutArgE->getBase(), CC, IsListInit});
+    // In all cases where there is a writeback conversion we should analyze its
+    // conversions.
+    if (OutArgE->getWriteback())
+      WorkList.push_back({OutArgE->getWriteback(), CC, IsListInit});
+    return;
+  }
+
   if (BinaryOperator *BO = dyn_cast<BinaryOperator>(E)) {
     // Do a somewhat different check with comparison operators.
     if (BO->isComparisonOp())
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 694a754646f274..fa8e79299a77e0 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -4057,6 +4057,10 @@ bool Sema::MergeFunctionDecl(FunctionDecl *New, 
NamedDecl *&OldD, Scope *S,
                                                          NewQType))
       return MergeCompatibleFunctionDecls(New, Old, S, MergeTypeWithOld);
 
+    if (getLangOpts().HLSL && Context.hasSameFunctionTypeIgnoringParamABI(
+                                  OldQTypeForComparison, NewQType))
+      return MergeCompatibleFunctionDecls(New, Old, S, MergeTypeWithOld);
+
     // If the types are imprecise (due to dependent constructs in friends or
     // local extern declarations), it's OK if they differ. We'll check again
     // during instantiation.
diff --git a/clang/lib/Sema/SemaExceptionSpec.cpp 
b/clang/lib/Sema/SemaExceptionSpec.cpp
index 427ffd9061ef3b..e2368bd9954ed4 100644
--- a/clang/lib/Sema/SemaExceptionSpec.cpp
+++ b/clang/lib/Sema/SemaExceptionSpec.cpp
@@ -1395,6 +1395,7 @@ CanThrowResult Sema::canThrow(const Stmt *S) {
   case Expr::EmbedExprClass:
   case Expr::ConceptSpecializationExprClass:
   case Expr::RequiresExprClass:
+  case Expr::HLSLOutArgExprClass:
     // These expressions can never throw.
     return CT_Cannot;
 
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 74c0e017059055..698b6df90baeec 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -52,6 +52,7 @@
 #include "clang/Sema/ScopeInfo.h"
 #include "clang/Sema/SemaCUDA.h"
 #include "clang/Sema/SemaFixItUtils.h"
+#include "clang/Sema/SemaHLSL.h"
 #include "clang/Sema/SemaInternal.h"
 #include "clang/Sema/SemaObjC.h"
 #include "clang/Sema/SemaOpenMP.h"
@@ -5915,6 +5916,16 @@ bool Sema::GatherArgumentsForCall(SourceLocation 
CallLoc, FunctionDecl *FDecl,
           ProtoArgType->isBlockPointerType())
         if (auto *BE = dyn_cast<BlockExpr>(Arg->IgnoreParenNoopCasts(Context)))
           BE->getBlockDecl()->setDoesNotEscape();
+      // During template instantiation it is possible that we already created
+      // the HLSLOutArgExpr if it was produced during tree transformation.
+      if ((Proto->getExtParameterInfo(i).getABI() == ParameterABI::HLSLOut ||
+           Proto->getExtParameterInfo(i).getABI() == ParameterABI::HLSLInOut) 
&&
+          !isa<HLSLOutArgExpr>(Arg)) {
+        ExprResult ArgExpr = HLSL().ActOnOutParamExpr(Param, Arg);
+        if (ArgExpr.isInvalid())
+          return true;
+        Arg = ArgExpr.getAs<Expr>();
+      }
 
       InitializedEntity Entity =
           Param ? InitializedEntity::InitializeParameter(Context, Param,
@@ -17008,10 +17019,10 @@ Sema::VerifyIntegerConstantExpression(Expr *E, 
llvm::APSInt *Result,
     if (!isa<ConstantExpr>(E))
       E = Result ? ConstantExpr::Create(Context, E, APValue(*Result))
                  : ConstantExpr::Create(Context, E);
-    
+
     if (Notes.empty())
       return E;
-    
+
     // If our only note is the usual "invalid subexpression" note, just point
     // the caret at its location rather than producing an essentially
     // redundant note.
@@ -17020,7 +17031,7 @@ Sema::VerifyIntegerConstantExpression(Expr *E, 
llvm::APSInt *Result,
       DiagLoc = Notes[0].first;
       Notes.clear();
     }
-    
+
     if (getLangOpts().CPlusPlus) {
       if (!Diagnoser.Suppress) {
         Diagnoser.diagnoseNotICE(*this, DiagLoc) << E->getSourceRange();
@@ -17033,7 +17044,7 @@ Sema::VerifyIntegerConstantExpression(Expr *E, 
llvm::APSInt *Result,
     Diagnoser.diagnoseFold(*this, DiagLoc) << E->getSourceRange();
     for (const PartialDiagnosticAt &Note : Notes)
       Diag(Note.first, Note.second);
-    
+
     return E;
   }
 
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 9940bc5b4a606a..3118fe577d7747 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -356,7 +356,7 @@ static bool isLegalTypeForHLSLSV_DispatchThreadID(QualType 
T) {
   return true;
 }
 
-void SemaHLSL::handleSV_DispatchThreadIDAttr(Decl *D, const ParsedAttr &AL) {  
+void SemaHLSL::handleSV_DispatchThreadIDAttr(Decl *D, const ParsedAttr &AL) {
   auto *VD = cast<ValueDecl>(D);
   if (!isLegalTypeForHLSLSV_DispatchThreadID(VD->getType())) {
     Diag(AL.getLoc(), diag::err_hlsl_attr_invalid_type)
@@ -1121,3 +1121,61 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned 
BuiltinID, CallExpr *TheCall) {
   }
   return false;
 }
+
+ExprResult SemaHLSL::ActOnOutParamExpr(ParmVarDecl *Param, Expr *Arg) {
+  assert(Param->hasAttr<HLSLParamModifierAttr>() &&
+         "We should not get here without a parameter modifier expression");
+  const auto *Attr = Param->getAttr<HLSLParamModifierAttr>();
+  if (Attr->getABI() == ParameterABI::Ordinary)
+    return ExprResult(Arg);
+
+  bool IsInOut = Attr->getABI() == ParameterABI::HLSLInOut;
+  if (!Arg->isLValue()) {
+    SemaRef.Diag(Arg->getBeginLoc(), diag::error_hlsl_inout_lvalue)
+        << Arg << (IsInOut ? 1 : 0);
+    return ExprError();
+  }
+
+  ASTContext &Ctx = SemaRef.getASTContext();
+
+  QualType Ty = Param->getType().getNonLValueExprType(Ctx);
+
+  // HLSL allows implicit conversions from scalars to vectors, but not the
+  // inverse, so we need to disallow `inout` with scalar->vector or
+  // scalar->matrix conversions.
+  if (Arg->getType()->isScalarType() != Ty->isScalarType()) {
+    SemaRef.Diag(Arg->getBeginLoc(), diag::error_hlsl_inout_scalar_extension)
+        << Arg << (IsInOut ? 1 : 0);
+    return ExprError();
+  }
+
+  bool RequiresConversion =
+      Ty.getUnqualifiedType() != Arg->getType().getUnqualifiedType();
+
+  // If the unqualified types mismatch we may have some casting. Since this
+  // results in a copy we can ignore qualifiers.
+  if (RequiresConversion) {
+    ExprResult Res =
+        SemaRef.PerformImplicitConversion(Arg, Ty, Sema::AA_Passing);
+    if (Res.isInvalid())
+      return ExprError();
+    Expr *Base = Res.get();
+    // After the cast, drop the reference type when creating the exprs.
+    Ty = Ty.getNonLValueExprType(Ctx);
+    auto *OpV = new (Ctx)
+        OpaqueValueExpr(Param->getBeginLoc(), Ty, VK_LValue, OK_Ordinary, 
Base);
+    Res = SemaRef.PerformImplicitConversion(OpV, Arg->getType(),
+                                            Sema::AA_Passing);
+    if (Res.isInvalid())
+      return ExprError();
+    Expr *Writeback = Res.get();
+    auto *OutExpr =
+        HLSLOutArgExpr::Create(Ctx, Ty, Base, IsInOut, Writeback, OpV);
+
+    return ExprResult(OutExpr);
+  }
+
+  auto *OpV = new (Ctx)
+      OpaqueValueExpr(Param->getBeginLoc(), Ty, VK_LValue, OK_Ordinary, Arg);
+  return ExprResult(HLSLOutArgExpr::Create(Ctx, Ty, Arg, IsInOut, OpV, OpV));
+}
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index c5f56ac62b458c..057ecd323cb956 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -7007,6 +7007,10 @@ void Sema::AddOverloadCandidate(
       // (13.3.3.1) that converts that argument to the corresponding
       // parameter of F.
       QualType ParamType = Proto->getParamType(ArgIdx);
+      auto ParamABI = Proto->getExtParameterInfo(ArgIdx).getABI();
+      if (ParamABI == ParameterABI::HLSLOut ||
+          ParamABI == ParameterABI::HLSLInOut)
+        ParamType = ParamType.getNonReferenceType();
       Candidate.Conversions[ConvIdx] = TryCopyInitialization(
           *this, Args[ArgIdx], ParamType, SuppressUserConversions,
           /*InOverloadResolution=*/true,
diff --git a/clang/lib/Sema/SemaSwift.cpp b/clang/lib/Sema/SemaSwift.cpp
index bf56ae8ac76d57..2eebce74b5e2f8 100644
--- a/clang/lib/Sema/SemaSwift.cpp
+++ b/clang/lib/Sema/SemaSwift.cpp
@@ -724,6 +724,9 @@ void SemaSwift::AddParameterABIAttr(Decl *D, const 
AttributeCommonInfo &CI,
   }
 
   switch (abi) {
+  case ParameterABI::HLSLOut:
+  case ParameterABI::HLSLInOut:
+    llvm_unreachable("explicit attribute for non-swift parameter ABI?");
   case ParameterABI::Ordinary:
     llvm_unreachable("explicit attribute for ordinary parameter ABI?");
 
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index 6fa39cdccef2b9..699154d49cacb4 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -2565,6 +2565,8 @@ static void checkExtParameterInfos(Sema &S, 
ArrayRef<QualType> paramTypes,
     switch (EPI.ExtParameterInfos[paramIndex].getABI()) {
     // Nothing interesting to check for orindary-ABI parameters.
     case ParameterABI::Ordinary:
+    case ParameterABI::HLSLOut:
+    case ParameterABI::HLSLInOut:
       continue;
 
     // swift_indirect_result parameters must be a prefix of the function
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 4d68ebf0cc4524..edc724d26269cb 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -16509,6 +16509,35 @@ 
TreeTransform<Derived>::TransformCapturedStmt(CapturedStmt *S) {
   return getSema().ActOnCapturedRegionEnd(Body.get());
 }
 
+template <typename Derived>
+ExprResult TreeTransform<Derived>::TransformHLSLOutArgExpr(HLSLOutArgExpr *E) {
+  ExprResult Res = getDerived().TransformExpr(E->getBase());
+  if (Res.isInvalid())
+    return ExprError();
+
+  if (!getDerived().AlwaysRebuild() && Res.get() == E->getBase())
+    return E;
+
+  Expr *BaseExpr = Res.get();
+  auto *OpV = new (getSema().Context) OpaqueValueExpr(
+      E->getBeginLoc(), E->getType(), VK_LValue, OK_Ordinary, BaseExpr);
+  if (isa<OpaqueValueExpr>(E->getWriteback()))
+    return HLSLOutArgExpr::Create(getSema().Context, E->getType(), BaseExpr,
+                                  E->isInOut(), OpV, OpV);
+  // If the writeback isn't directly an opaque value, it means there is a
+  // conversion sequence. Since none of these are actually dependent types, and
+  // we already generated this sequence once successfully in the pattern decl 
it
+  // should be impossible for this to fail generating a new conversion 
sequence.
+  Res =
+      getSema().PerformImplicitConversion(OpV, E->getType(), Sema::AA_Passing);
+  if (Res.isInvalid())
+    return ExprError();
+  Expr *Writeback = Res.get();
+
+  return HLSLOutArgExpr::Create(getSema().Context, E->getType(), BaseExpr,
+                                E->isInOut(), Writeback, OpV);
+}
+
 } // end namespace clang
 
 #endif // LLVM_CLANG_LIB_SEMA_TREETRANSFORM_H
diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp 
b/clang/lib/Serialization/ASTReaderStmt.cpp
index e1cba9e612be3d..e348ba441f08c5 100644
--- a/clang/lib/Serialization/ASTReaderStmt.cpp
+++ b/clang/lib/Serialization/ASTReaderStmt.cpp
@@ -2832,6 +2832,22 @@ void 
ASTStmtReader::VisitOpenACCLoopConstruct(OpenACCLoopConstruct *S) {
   VisitOpenACCAssociatedStmtConstruct(S);
 }
 
+//===----------------------------------------------------------------------===//
+// HLSL Constructs/Directives.
+//===----------------------------------------------------------------------===//
+
+void ASTStmtReader::VisitHLSLOutArgExpr(HLSLOutArgExpr *S) {
+  VisitExpr(S);
+  S->Base = Record.readSubExpr();
+  S->Writeback = Record.readSubExpr();
+  S->IsInOut = Record.readBool();
+  Expr::child_iterator Child = S->getWriteback()->child_begin();
+  while (!isa<OpaqueValueExpr>(*Child))
+    Child = Child->child_begin();
+  assert(isa<OpaqueValueExpr>(*Child) && "Writeback must end in an OVE.");
+  S->OpaqueVal = cast<OpaqueValueExpr>(*Child);
+}
+
 
//===----------------------------------------------------------------------===//
 // ASTReader Implementation
 
//===----------------------------------------------------------------------===//
@@ -4280,13 +4296,17 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) {
       S = OpenACCLoopConstruct::CreateEmpty(Context, NumClauses);
       break;
     }
-    case EXPR_REQUIRES:
+    case EXPR_REQUIRES: {
       unsigned numLocalParameters = Record[ASTStmtReader::NumExprFields];
       unsigned numRequirement = Record[ASTStmtReader::NumExprFields + 1];
       S = RequiresExpr::Create(Context, Empty, numLocalParameters,
                                numRequirement);
       break;
     }
+    case EXPR_HLSL_OUT_ARG:
+      S = HLSLOutArgExpr::CreateEmpty(Context);
+      break;
+    }
 
     // We hit a STMT_STOP, so we're done with this expression.
     if (Finished)
diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp 
b/clang/lib/Serialization/ASTWriterStmt.cpp
index ec667b58337ff5..e43ced0b59ebdc 100644
--- a/clang/lib/Serialization/ASTWriterStmt.cpp
+++ b/clang/lib/Serialization/ASTWriterStmt.cpp
@@ -2903,6 +2903,18 @@ void 
ASTStmtWriter::VisitOpenACCLoopConstruct(OpenACCLoopConstruct *S) {
   Code = serialization::STMT_OPENACC_LOOP_CONSTRUCT;
 }
 
+//===----------------------------------------------------------------------===//
+// HLSL Constructs/Directives.
+//===----------------------------------------------------------------------===//
+
+void ASTStmtWriter::VisitHLSLOutArgExpr(HLSLOutArgExpr *S) {
+  VisitExpr(S);
+  Record.AddStmt(S->getBase());
+  Record.AddStmt(S->getWriteback());
+  Record.writeBool(S->isInOut());
+  Code = serialization::EXPR_HLSL_OUT_ARG;
+}
+
 
//===----------------------------------------------------------------------===//
 // ASTWriter Implementation
 
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp 
b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index 62a240ecbc6003..c664b01ece8b7b 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -1826,7 +1826,8 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
     case Stmt::OpenACCComputeConstructClass:
     case Stmt::OpenACCLoopConstructClass:
     case Stmt::OMPUnrollDirectiveClass:
-    case Stmt::OMPMetaDirectiveClass: {
+    case Stmt::OMPMetaDirectiveClass:
+    case Stmt::HLSLOutArgExprClass: {
       const ExplodedNode *node = Bldr.generateSink(S, Pred, Pred->getState());
       Engine.addAbortedBlock(node, currBldrCtx->getBlock());
       break;
diff --git a/clang/test/AST/HLSL/OutArgExpr.hlsl 
b/clang/test/AST/HLSL/OutArgExpr.hlsl
new file mode 100644
index 00000000000000..8ad77bd77a88b3
--- /dev/null
+++ b/clang/test/AST/HLSL/OutArgExpr.hlsl
@@ -0,0 +1,65 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-pch 
-finclude-default-header -o %t.pch %s
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute 
-finclude-default-header -include-pch %t.pch %s -ast-dump | FileCheck 
--check-prefix=AST %s
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute 
-finclude-default-header -include-pch %t.pch %s -ast-print | FileCheck %s
+
+
+#ifndef TEST_HLSL
+#define TEST_HLSL
+
+RWBuffer<float> Buf;
+
+// CHECK: void trunc_Param(inout int &X) {
+
+// AST: FunctionDecl {{.*}} used trunc_Param 'void (inout int)'
+// AST-NEXT: ParmVarDecl {{.*}} X 'int &'
+// AST-NEXT: HLSLParamModifierAttr {{.*}} inout
+
+void trunc_Param(inout int X) {}
+
+// CHECK: void zero(out int &Z) {
+// CHECK-NEXT: Z = 0;
+
+// AST: FunctionDecl {{.*}} zero 'void (out int)'
+// AST-NEXT: ParmVarDecl {{.*}} used Z 'int &'
+// AST-NEXT: HLSLParamModifierAttr {{.*}} out
+void zero(out int Z) { Z = 0; }
+
+// AST: FunctionDecl {{.*}} imported used fn 'void (uint)'
+// AST: CallExpr {{.*}} 'void'
+// AST-NEXT: ImplicitCastExpr {{.*}} 'void (*)(inout int)' 
<FunctionToPointerDecay>
+// AST-NEXT: DeclRefExpr {{.*}} 'void (inout int)' lvalue Function
+// AST-NEXT: HLSLOutArgExpr {{.*}} 'int' lvalue inout
+// AST-NEXT: ImplicitCastExpr [[BaseCast:0x[0-9a-fA-F]+]] <{{.*}}> 'int' 
<FloatingToIntegral>
+// AST-NEXT: ImplicitCastExpr {{.*}}  'float' <LValueToRValue>
+// AST-NEXT: CXXOperatorCallExpr {{.*}} 'float' lvalue '[]'
+// AST-NEXT: ImplicitCastExpr {{.*}} 'float &(*)(unsigned int)' 
<FunctionToPointerDecay>
+// AST-NEXT: DeclRefExpr {{.*}} 'float &(unsigned int)' lvalue CXXMethod 
{{.*}} 'operator[]' 'float &(unsigned int)'
+// AST-NEXT: DeclRefExpr {{.*}}  'RWBuffer<float>':'hlsl::RWBuffer<float>' 
lvalue Var
+// AST-NEXT: ImplicitCastExpr {{.*}} 'uint':'unsigned int' <LValueToRValue>
+// AST-NEXT: DeclRefExpr {{.*}} 'uint':'unsigned int' lvalue ParmVar {{.*}} 
'GI' 'uint':'unsigned int'
+// AST-NEXT: ImplicitCastExpr {{.*}} 'float' <IntegralToFloating>
+// AST-NEXT: ImplicitCastExpr {{.*}} 'int' <LValueToRValue>
+// AST-NEXT: OpaqueValueExpr {{.*}} 'int' lvalue
+// AST-NEXT: ImplicitCastExpr [[BaseCast]] <{{.*}}> 'int' <FloatingToIntegral>
+
+// CHECK: void fn(uint GI) {
+// CHECK:     trunc_Param(Buf[GI]);
+void fn(uint GI) {
+  trunc_Param(Buf[GI]);
+}
+
+#else
+
+// AST: FunctionDecl {{.*}} main 'void (uint)'
+// AST: CallExpr {{.*}} 'void'
+// AST-NEXT: ImplicitCastExpr {{.*}} 'void (*)(out int)' 
<FunctionToPointerDecay>
+// AST-NEXT: DeclRefExpr {{.*}} 'void (out int)' lvalue Function {{.*}} 'zero' 
'void (out int)'
+// AST-NEXT: HLSLOutArgExpr {{.*}} 'int' lvalue out
+// AST-NEXT: DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'I' 'int'
+[numthreads(8,1,1)]
+void main(uint GI : SV_GroupIndex) {
+  int I;
+  zero(I);
+  fn(GI);
+}
+#endif // TEST_HLSL
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl 
b/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl
new file mode 100644
index 00000000000000..8ed02bb4bbb76a
--- /dev/null
+++ b/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl
@@ -0,0 +1,128 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library -disable-llvm-passes 
-emit-llvm -finclude-default-header -o - %s | FileCheck %s 
--check-prefixes=CHECK,ALL
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library -O3 -emit-llvm 
-finclude-default-header -o - %s | FileCheck %s --check-prefixes=OPT,ALL
+
+// Case 1: Simple floating integral conversion.
+// In this test case a float value is passed to an inout parameter taking an
+// integer. It is converted to an integer on call and converted back after the
+// function.
+void trunc_Param(inout int X) {}
+
+// ALL-LABEL: define noundef float {{.*}}case1
+// CHECK: [[F:%.*]] = alloca float
+// CHECK: [[ArgTmp:%.*]] = alloca i32
+// CHECK: [[FVal:%.*]] = load float, ptr {{.*}}
+// CHECK: [[IVal:%.*]] = fptosi float [[FVal]] to i32
+// CHECK: store i32 [[IVal]], ptr [[ArgTmp]]
+// CHECK: call void {{.*}}trunc_Param{{.*}}(ptr noundef nonnull align 4 
dereferenceable(4) [[ArgTmp]])
+// CHECK: [[IRet:%.*]] = load i32, ptr [[ArgTmp]]
+// CHECK: [[FRet:%.*]] = sitofp i32 [[IRet]] to float
+// CHECK: store float [[FRet]], ptr [[F]]
+// OPT: [[IVal:%.*]] = fptosi float {{.*}} to i32
+// OPT: [[FVal:%.*]] = sitofp i32 [[IVal]] to float
+// OPT: ret float [[FVal]]
+export float case1(float F) {
+  trunc_Param(F);
+  return F;
+}
+
+// Case 2: Uninitialized `out` parameters.
+// `out` parameters are not pre-initialized by the caller, so they are
+// uninitialized in the function. If they are not initialized before the
+// function returns the value is undefined.
+void undef(out int Z) { }
+
+// ALL-LABEL: define noundef i32 {{.*}}case2
+// CHECK: [[V:%.*]] = alloca i32
+// CHECK: [[ArgTmp:%.*]] = alloca i32
+// CHECK-NOT: store {{.*}}, ptr [[ArgTmp]]
+// CHECK: call void {{.*}}unde{{.*}}(ptr noundef nonnull align 4 
dereferenceable(4) [[ArgTmp]])
+// CHECK-NOT: store {{.*}}, ptr [[ArgTmp]]
+// CHECK: [[Res:%.*]] = load i32, ptr [[ArgTmp]]
+// CHECK: store i32 [[Res]], ptr [[V]], align 4
+// OPT: ret i32 undef
+export int case2() {
+  int V;
+  undef(V);
+  return V;
+}
+
+// Case 3: Simple initialized `out` parameter.
+// This test should verify that an out parameter value is written to as 
expected.
+void zero(out int Z) { Z = 0; }
+
+// ALL-LABEL: define noundef i32 {{.*}}case3
+// CHECK: [[V:%.*]] = alloca i32
+// CHECK: [[ArgTmp:%.*]] = alloca i32
+// CHECK-NOT: store {{.*}}, ptr [[ArgTmp]]
+// CHECK: call void {{.*}}zero{{.*}}(ptr noundef nonnull align 4 
dereferenceable(4) [[ArgTmp]])
+// CHECK-NOT: store {{.*}}, ptr [[ArgTmp]]
+// CHECK: [[Res:%.*]] = load i32, ptr [[ArgTmp]]
+// CHECK: store i32 [[Res]], ptr [[V]], align 4
+// OPT: ret i32 0
+export int case3() {
+  int V;
+  zero(V);
+  return V;
+}
+
+// Case 4: Vector swizzle arguments.
+// Vector swizzles in HLSL produce lvalues, so they can be used as arguments to
+// inout parameters and the swizzle is reversed on writeback.
+void funky(inout int3 X) {
+  X.x += 1;
+  X.y += 2;
+  X.z += 3;
+}
+
+// ALL-LABEL: define noundef <3 x i32> {{.*}}case4
+
+// This block initializes V = 0.xxx.
+// CHECK:  [[V:%.*]] = alloca <3 x i32>
+// CHECK:  [[ArgTmp:%.*]] = alloca <3 x i32>
+// CHECK:  store <1 x i32> zeroinitializer, ptr [[ZeroPtr:%.*]]
+// CHECK:  [[ZeroV1:%.*]] = load <1 x i32>, ptr [[ZeroPtr]]
+// CHECK:  [[ZeroV3:%.*]] = shufflevector <1 x i32> [[ZeroV1]], <1 x i32> 
poison, <3 x i32> zeroinitializer
+// CHECK:  store <3 x i32> [[ZeroV3]], ptr [[V]]
+
+// Shuffle the vector to the temporary.
+// CHECK:  [[VVal:%.*]] = load <3 x i32>, ptr [[V]]
+// CHECK:  [[Vyzx:%.*]] = shufflevector <3 x i32> [[VVal]], <3 x i32> poison, 
<3 x i32> <i32 1, i32 2, i32 0>
+// CHECK:  store <3 x i32> [[Vyzx]], ptr [[ArgTmp]]
+
+// Call the function with the temporary.
+// CHECK: call void {{.*}}funky{{.*}}(ptr noundef nonnull align 16 
dereferenceable(16) [[ArgTmp]])
+
+// Shuffle it back.
+// CHECK:  [[RetVal:%.*]] = load <3 x i32>, ptr [[ArgTmp]]
+// CHECK:  [[Vxyz:%.*]] = shufflevector <3 x i32> [[RetVal]], <3 x i32> 
poison, <3 x i32> <i32 2, i32 0, i32 1>
+// CHECK:  store <3 x i32> [[Vxyz]], ptr [[V]]
+
+// OPT: ret <3 x i32> <i32 3, i32 1, i32 2>
+export int3 case4() {
+  int3 V = 0.xxx;
+  funky(V.yzx);
+  return V;
+}
+
+
+// Case 5: Straightforward inout of a scalar value.
+void increment(inout int I) {
+  I += 1;
+}
+
+// ALL-LABEL: define noundef i32 {{.*}}case5
+
+// CHECK: [[I:%.*]] = alloca i32
+// CHECK: [[ArgTmp:%.*]] = alloca i32
+// CHECK: store i32 4, ptr [[I]]
+// CHECK: [[IInit:%.*]] = load i32, ptr [[I]]
+// CHECK: store i32 [[IInit:%.*]], ptr [[ArgTmp]], align 4
+// CHECK: call void {{.*}}increment{{.*}}(ptr noundef nonnull align 4 
dereferenceable(4) [[ArgTmp]])
+// CHECK: [[RetVal:%.*]] = load i32, ptr [[ArgTmp]]
+// CHECK: store i32 [[RetVal]], ptr [[I]], align 4
+// OPT: ret i32 5
+export int case5() {
+  int I = 4;
+  increment(I);
+  return I;
+}
diff --git a/clang/test/SemaHLSL/Language/OutputParameters.hlsl 
b/clang/test/SemaHLSL/Language/OutputParameters.hlsl
new file mode 100644
index 00000000000000..44358a2fbca24b
--- /dev/null
+++ b/clang/test/SemaHLSL/Language/OutputParameters.hlsl
@@ -0,0 +1,34 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute 
-finclude-default-header -verify -Wdouble-promotion -Wconversion %s
+
+void OutVecFn(out float3) {}
+void InOutVecFn(inout float3) {}
+
+// Case 1: Calling out and inout parameters with types that cannot be
+// back-converted. In HLSL 2021 and earlier this only occurs when passing 
scalar
+// arguments to vector parameters because scalar->vector converison is 
implicit,
+// but vector->scalar is not.
+void case1() {
+  float f;
+  int i;
+  OutVecFn(f); // expected-error{{illegal scalar extension cast on argument f 
to out paramemter}}
+  InOutVecFn(f); // expected-error{{illegal scalar extension cast on argument 
f to inout paramemter}}
+
+  OutVecFn(i); // expected-error{{illegal scalar extension cast on argument i 
to out paramemter}}
+  InOutVecFn(i); // expected-error{{illegal scalar extension cast on argument 
i to inout paramemter}}
+}
+
+// Case 2: Conversion warnings on argument initialization. Clang generates
+// implicit conversion warnings only on the writeback conversion for `out`
+// parameters since the parameter is not initialized from the argument. Clang
+// generates implicit conversion warnings on both the parameter initialization
+// and the writeback for `inout` parameters since the parameter is both copied
+// in and out of the function.
+
+void OutFloat(out float) {}
+void InOutFloat(inout float) {}
+
+void case2() {
+  double f;
+  OutFloat(f); // expected-warning{{implicit conversion increases 
floating-point precision: 'float' to 'double'}}
+  InOutFloat(f); // expected-warning{{implicit conversion increases 
floating-point precision: 'float' to 'double'}} expected-warning{{implicit 
conversion loses floating-point precision: 'double' to 'float'}}
+}
diff --git a/clang/test/SemaHLSL/Language/TemplateOutArg.hlsl 
b/clang/test/SemaHLSL/Language/TemplateOutArg.hlsl
new file mode 100644
index 00000000000000..0416806b569522
--- /dev/null
+++ b/clang/test/SemaHLSL/Language/TemplateOutArg.hlsl
@@ -0,0 +1,169 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute 
-finclude-default-header %s -ast-dump | FileCheck %s
+
+// Case 1: Template declaration with a call to an inout or out argument that is
+// resolved based on the template parameter. For this case the template decl
+// should have an UnresolvedLookupExpr for the call, and the HLSLOutArgExpr is
+// built during call resolution.
+
+// CHECK: FunctionDecl {{.*}} used fn 'void (inout int)'
+void fn(inout int I) {
+  I += 1;
+}
+
+// CHECK: FunctionDecl {{.*}} used fn 'void (out double)'
+void fn(out double F) {
+  F = 1.5;
+}
+
+// CHECK-LABEL: FunctionTemplateDecl {{.*}} wrapper
+// CHECK-NEXT: TemplateTypeParmDecl {{.*}} referenced typename depth 0 index 0 
T
+
+// Verify that the template has an unresolved call.
+// CHECK-NEXT: FunctionDecl {{.*}} wrapper 'T (T)'
+// CHECK-NEXT: ParmVarDecl {{.*}} referenced V 'T'
+// CHECK: CallExpr {{.*}} '<dependent type>'
+// CHECK: UnresolvedLookupExpr {{.*}} '<overloaded function type>' lvalue 
(ADL) = 'fn'
+
+// Verify that the int instantiation resolves an inout argument expression.
+
+// CHECK-LABEL: FunctionDecl {{.*}} used wrapper 'int (int)' 
implicit_instantiation
+// CHECK: CallExpr {{.*}} 'void'
+// CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(inout int)' 
<FunctionToPointerDecay>
+// CHECK-NEXT:   DeclRefExpr {{.*}} 'void (inout int)' lvalue Function {{.*}} 
'fn' 'void (inout int)'
+// CHECK-NEXT: HLSLOutArgExpr {{.*}} 'int' lvalue inout
+// CHECK-NEXT:   ImplicitCastExpr [[BaseExpr:0x[0-9a-fA-F]+]] {{.*}} 'int' 
<LValueToRValue>
+// CHECK-NEXT:     DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'V' 'int'
+// CHECK-NEXT:   ImplicitCastExpr {{.*}} <col:6> 'int' <LValueToRValue>
+// CHECK-NEXT:     OpaqueValueExpr {{.*}} <col:6> 'int' lvalue
+// CHECK-NEXT:       ImplicitCastExpr [[BaseExpr]] {{.*}} 'int' 
<LValueToRValue>
+
+// Verify that the float instantiation has an out argument expression
+// containing casts to and from double.
+
+// CHECK-LABEL: FunctionDecl {{.*}} used wrapper 'float (float)' 
implicit_instantiation
+// CHECK: CallExpr {{.*}} 'void'
+// CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(out double)' 
<FunctionToPointerDecay>
+// CHECK-NEXT:   DeclRefExpr {{.*}}'void (out double)' lvalue Function {{.*}} 
'fn' 'void (out double)'
+// CHECK-NEXT: HLSLOutArgExpr {{.*}} 'double' lvalue out
+// CHECK-NEXT: ImplicitCastExpr [[BaseExpr:0x[0-9a-fA-F]+]] {{.*}} 'double'
+// CHECK-NEXT:   ImplicitCastExpr {{.*}} 'float' <LValueToRValue>
+// CHECK-NEXT:     DeclRefExpr {{.*}}'float' lvalue ParmVar {{.*}} 'V' 'float'
+// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float' <FloatingCast>
+// CHECK-NEXT:   ImplicitCastExpr {{.*}} 'double' <LValueToRValue>
+// CHECK-NEXT:     OpaqueValueExpr {{.*}} 'double' lvalue
+// CHECK-NEXT:       ImplicitCastExpr [[BaseExpr]] {{.*}} 'double' 
<FloatingCast>
+
+// Verify that the double instantiation is just an out expression.
+
+// CHECK-LABEL: FunctionDecl {{.*}} used wrapper 'double (double)' 
implicit_instantiation
+// CHECK: CallExpr {{.*}} 'void'
+// CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(out double)' 
<FunctionToPointerDecay>
+// CHECK-NEXT:   DeclRefExpr {{.*}}'void (out double)' lvalue Function {{.*}} 
'fn' 'void (out double)'
+// CHECK-NEXT: HLSLOutArgExpr {{.*}} 'double' lvalue out
+// CHECK-NEXT: ImplicitCastExpr [[BaseExpr:0x[0-9a-fA-F]+]] {{.*}} 'double' 
<LValueToRValue>
+// CHECK-NEXT:   DeclRefExpr {{.*}}'double' lvalue ParmVar {{.*}} 'V' 'double'
+// CHECK-NEXT: ImplicitCastExpr {{.*}} 'double' <LValueToRValue>
+// CHECK-NEXT:   OpaqueValueExpr {{.*}} 'double' lvalue
+// CHECK-NEXT:     ImplicitCastExpr [[BaseExpr]] {{.*}} 'double' 
<LValueToRValue>
+
+template <typename T>
+T wrapper(T V) {
+  fn(V);
+  return V;
+}
+
+// Case 2: Verify that the parameter modifier attribute is instantiated with 
the
+// template (this one is a gimme).
+
+// CHECK-LABEL: FunctionTemplateDecl {{.*}} fizz
+
+// Check the pattern decl.
+// CHECK: FunctionDecl {{.*}} fizz 'void (inout T)'
+// CHECK-NEXT: ParmVarDecl {{.*}} referenced V 'T'
+// CHECK-NEXT: HLSLParamModifierAttr {{.*}} inout
+
+// Check the 3 instantiations (int, float, & double).
+
+// CHECK-LABEL: FunctionDecl {{.*}} used fizz 'void (inout int)' 
implicit_instantiation
+// CHECK: ParmVarDecl {{.*}} used V 'int &'
+// CHECK-NEXT: HLSLParamModifierAttr {{.*}} inout
+
+// CHECK-LABEL: FunctionDecl {{.*}} used fizz 'void (inout float)' 
implicit_instantiation
+// CHECK: ParmVarDecl {{.*}} used V 'float &'
+// CHECK-NEXT: HLSLParamModifierAttr {{.*}} inout
+
+// CHECK-LABEL: FunctionDecl {{.*}} used fizz 'void (inout double)' 
implicit_instantiation
+// CHECK: ParmVarDecl {{.*}} used V 'double &'
+// CHECK-NEXT: HLSLParamModifierAttr {{.*}} inout
+template <typename T>
+void fizz(inout T V) {
+  V += 2;
+}
+
+// Case 3: Verify that HLSLOutArgExpr nodes which are present in the template
+// are correctly instantiated into the instantation.
+
+// First we check that the AST node is in the template.
+
+// CHECK-LABEL: FunctionTemplateDecl {{.*}} buzz
+
+// CHECK: FunctionDecl {{.*}} buzz 'T (int, T)'
+// CHECK: CallExpr {{.*}} 'void'
+// CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(inout int)' 
<FunctionToPointerDecay>
+// CHECK-NEXT:   DeclRefExpr {{.*}} 'void (inout int)' lvalue Function {{.*}} 
'fn' 'void (inout int)'
+// CHECK-NEXT: HLSLOutArgExpr {{.*}} 'int' lvalue inout
+// CHECK-NEXT:   DeclRefExpr [[BaseExpr:0x[0-9a-fA-F]+]] {{.*}} 'int' lvalue 
ParmVar {{.*}} 'X' 'int'
+// CHECK-NEXT:   OpaqueValueExpr {{.*}} 'int' lvalue
+// CHECK-NEXT:     DeclRefExpr [[BaseExpr]] {{.*}} 'int' lvalue ParmVar {{.*}} 
'X' 'int'
+
+// CHECK-LABEL: FunctionDecl {{.*}} used buzz 'int (int, int)' 
implicit_instantiation
+// CHECK: CallExpr {{.*}} 'void'
+// CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(inout int)' 
<FunctionToPointerDecay>
+// CHECK-NEXT:   DeclRefExpr {{.*}} 'void (inout int)' lvalue Function {{.*}} 
'fn' 'void (inout int)'
+// CHECK-NEXT: HLSLOutArgExpr {{.*}} 'int' lvalue inout
+// CHECK-NEXT:   DeclRefExpr [[BaseExpr:0x[0-9a-fA-F]+]] {{.*}} 'int' lvalue 
ParmVar {{.*}} 'X' 'int'
+// CHECK-NEXT:   OpaqueValueExpr {{.*}} 'int' lvalue
+// CHECK-NEXT:     DeclRefExpr [[BaseExpr]] {{.*}} 'int' lvalue ParmVar {{.*}} 
'X' 'int'
+
+// CHECK-LABEL: FunctionDecl {{.*}} used buzz 'float (int, float)' 
implicit_instantiation
+// CHECK: CallExpr {{.*}} 'void'
+// CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(inout int)' 
<FunctionToPointerDecay>
+// CHECK-NEXT:   DeclRefExpr {{.*}} 'void (inout int)' lvalue Function {{.*}} 
'fn' 'void (inout int)'
+// CHECK-NEXT: HLSLOutArgExpr {{.*}} 'int' lvalue inout
+// CHECK-NEXT:   DeclRefExpr [[BaseExpr:0x[0-9a-fA-F]+]] {{.*}} 'int' lvalue 
ParmVar {{.*}} 'X' 'int'
+// CHECK-NEXT:   OpaqueValueExpr {{.*}} 'int' lvalue
+// CHECK-NEXT:     DeclRefExpr [[BaseExpr]] {{.*}} 'int' lvalue ParmVar {{.*}} 
'X' 'int'
+
+
+// CHECK-LABEL: FunctionDecl {{.*}} used buzz 'double (int, double)' 
implicit_instantiation
+// CHECK: CallExpr {{.*}} 'void'
+// CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(inout int)' 
<FunctionToPointerDecay>
+// CHECK-NEXT:   DeclRefExpr {{.*}} 'void (inout int)' lvalue Function {{.*}} 
'fn' 'void (inout int)'
+// CHECK-NEXT: HLSLOutArgExpr {{.*}} 'int' lvalue inout
+// CHECK-NEXT:   DeclRefExpr [[BaseExpr:0x[0-9a-fA-F]+]] {{.*}} 'int' lvalue 
ParmVar {{.*}} 'X' 'int'
+// CHECK-NEXT:   OpaqueValueExpr {{.*}} 'int' lvalue
+// CHECK-NEXT:     DeclRefExpr [[BaseExpr]] {{.*}} 'int' lvalue ParmVar {{.*}} 
'X' 'int'
+
+template <typename T>
+T buzz(int X, T Y) {
+  fn(X);
+  return X + Y;
+}
+
+export void caller() {
+  int X = 2;
+  float Y = 3.3;
+  double Z = 2.2;
+
+  X = wrapper(X);
+  Y = wrapper(Y);
+  Z = wrapper(Z);
+
+  fizz(X);
+  fizz(Y);
+  fizz(Z);
+
+  X = buzz(X, X);
+  Y = buzz(X, Y);
+  Z = buzz(X, Z);
+}
diff --git a/clang/test/SemaHLSL/parameter_modifiers.hlsl 
b/clang/test/SemaHLSL/parameter_modifiers.hlsl
index c728a41b650eed..cf739851e4547a 100644
--- a/clang/test/SemaHLSL/parameter_modifiers.hlsl
+++ b/clang/test/SemaHLSL/parameter_modifiers.hlsl
@@ -48,11 +48,9 @@ void callFns() {
   // Call with literal arguments.
   implicitFn(1); // Ok.
   inFn(1); // Ok.
-  inoutFn(1); // expected-error{{no matching function for call to 'inoutFn'}}
-  // expected-note@#inoutFn{{candidate function not viable: no known 
conversion from 'int' to 'float &' for 1st argument}}
-  outFn(1); // expected-error{{no matching function for call to 'outFn}}
-  // expected-note@#outFn{{candidate function not viable: no known conversion 
from 'int' to 'float &' for 1st argument}}
-  
+  inoutFn(1); // expected-error{{cannot bind non-lvalue argument 1 to inout 
paramemter}}
+  outFn(1); // expected-error{{cannot bind non-lvalue argument 1 to out 
paramemter}}
+
   // Call with variables.
   float f;
   implicitFn(f); // Ok.
diff --git a/clang/test/SemaHLSL/parameter_modifiers_ast.hlsl 
b/clang/test/SemaHLSL/parameter_modifiers_ast.hlsl
index 50b162bdfc26cc..c5246045be259e 100644
--- a/clang/test/SemaHLSL/parameter_modifiers_ast.hlsl
+++ b/clang/test/SemaHLSL/parameter_modifiers_ast.hlsl
@@ -11,25 +11,25 @@ void fn(float f);
 // CHECK-NOT: HLSLParamModifierAttr
 void fn2(in float f);
 
-// CHECK: FunctionDecl {{.*}} fn3 'void (float &)'
+// CHECK: FunctionDecl {{.*}} fn3 'void (out float)'
 // CHECK-NEXT: ParmVarDecl {{.*}} f 'float &'
 // CHECK-NEXT: HLSLParamModifierAttr {{.*}} out
 // CHECK-NOT: HLSLParamModifierAttr
 void fn3(out float f);
 
-// CHECK: FunctionDecl {{.*}} fn4 'void (float &)'
+// CHECK: FunctionDecl {{.*}} fn4 'void (inout float)'
 // CHECK-NEXT: ParmVarDecl {{.*}} f 'float &'
 // CHECK-NEXT: HLSLParamModifierAttr {{.*}} inout
 // CHECK-NOT: HLSLParamModifierAttr
 void fn4(inout float f);
 
-// CHECK: FunctionDecl {{.*}} fn5 'void (float &)'
+// CHECK: FunctionDecl {{.*}} fn5 'void (inout float)'
 // CHECK-NEXT: ParmVarDecl {{.*}} f 'float &'
 // CHECK-NEXT: HLSLParamModifierAttr {{.*}} inout MergedSpelling
 // CHECK-NOT: HLSLParamModifierAttr
 void fn5(out in float f);
 
-// CHECK: FunctionDecl {{.*}} fn6 'void (float &)'
+// CHECK: FunctionDecl {{.*}} fn6 'void (inout float)'
 // CHECK-NEXT: ParmVarDecl {{.*}} f 'float &'
 // CHECK-NEXT: HLSLParamModifierAttr {{.*}} inout MergedSpelling
 // CHECK-NOT: HLSLParamModifierAttr
@@ -37,10 +37,10 @@ void fn6(in out float f);
 
 // CHECK-NEXT: FunctionTemplateDecl [[Template:0x[0-9a-fA-F]+]] {{.*}} fn7
 // CHECK-NEXT: TemplateTypeParmDecl {{.*}} referenced typename depth 0 index 0 
T
-// CHECK-NEXT: FunctionDecl {{.*}} fn7 'void (T)'
+// CHECK-NEXT: FunctionDecl {{.*}} fn7 'void (inout T)'
 // CHECK-NEXT: ParmVarDecl {{.*}} f 'T'
 // CHECK-NEXT: HLSLParamModifierAttr {{.*}} inout
-// CHECK-NEXT: FunctionDecl [[Instantiation:0x[0-9a-fA-F]+]] {{.*}} used fn7 
'void (float &)' implicit_instantiation
+// CHECK-NEXT: FunctionDecl [[Instantiation:0x[0-9a-fA-F]+]] {{.*}} used fn7 
'void (inout float)' implicit_instantiation
 // CHECK-NEXT: TemplateArgument type 'float'
 // CHECK-NEXT:  BuiltinType {{.*}} 'float'
 // CHECK-NEXT: ParmVarDecl {{.*}} f 'float &'
@@ -54,11 +54,11 @@ void fn7(inout T f);
 // CHECK-NEXT: DeclStmt
 // CHECK-NEXT: VarDecl {{.*}} used f 'float'
 // CHECK-NEXT: CallExpr {{.*}} 'void'
-// CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(float &)' 
<FunctionToPointerDecay>
-// CHECK-NEXT: DeclRefExpr {{.*}} 'void (float &)' lvalue
-// CHECK-SAME: Function [[Instantiation]] 'fn7' 'void (float &)'
+// CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(inout float)' 
<FunctionToPointerDecay>
+// CHECK-NEXT: DeclRefExpr {{.*}} 'void (inout float)' lvalue
+// CHECK-SAME: Function [[Instantiation]] 'fn7' 'void (inout float)'
 // CHECK-SAME: (FunctionTemplate [[Template]] 'fn7')
-// CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue Var {{.*}} 'f' 'float'
+// CHECK-NEXT: HLSLOutArgExpr {{.*}}'float' lvalue
 void fn8() {
   float f;
   fn7<float>(f);
diff --git a/clang/tools/libclang/CXCursor.cpp 
b/clang/tools/libclang/CXCursor.cpp
index 782c0c243ef1fb..9e8ebe57b23243 100644
--- a/clang/tools/libclang/CXCursor.cpp
+++ b/clang/tools/libclang/CXCursor.cpp
@@ -336,6 +336,7 @@ CXCursor cxcursor::MakeCXCursor(const Stmt *S, const Decl 
*Parent,
   case Stmt::RecoveryExprClass:
   case Stmt::SYCLUniqueStableNameExprClass:
   case Stmt::EmbedExprClass:
+  case Stmt::HLSLOutArgExprClass:
     K = CXCursor_UnexposedExpr;
     break;
 

>From ac6799f0b05450b3932692d90ac2f9bc1feff5fc Mon Sep 17 00:00:00 2001
From: Chris Bieneman <chris.biene...@me.com>
Date: Mon, 29 Jul 2024 18:37:48 -0500
Subject: [PATCH 02/10] Fix aggregate copy emission

This fixes up the code generation errors in non-scalar writeback
operations.
---
 clang/lib/CodeGen/CGCall.cpp                  | 20 +++----
 .../BasicFeatures/OutputArguments.hlsl        | 55 +++++++++++++++++++
 2 files changed, 65 insertions(+), 10 deletions(-)

diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index fd1788893b7ac3..018938eb53621a 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -4152,20 +4152,20 @@ static void emitWriteback(CodeGenFunction &CGF,
          "shouldn't have writeback for provably null argument");
 
   if (CGF.getLangOpts().HLSL) {
-    if (writeback.CastExpr) {
+    if (!isa<OpaqueValueExpr>(writeback.CastExpr)) {
       RValue TmpVal = CGF.EmitAnyExprToTemp(writeback.CastExpr);
       if (TmpVal.isScalar())
         CGF.EmitStoreThroughLValue(TmpVal, srcLV);
-      else
-        CGF.EmitAggregateStore(srcLV.getPointer(CGF),
-                               TmpVal.getAggregateAddress(), false);
-    } else {
-      if (srcLV.isSimple())
-        CGF.EmitAggregateStore(srcLV.getPointer(CGF), writeback.Temporary,
-                               false);
       else {
-        llvm::Value *value = CGF.Builder.CreateLoad(writeback.Temporary);
-        RValue TmpVal = RValue::get(value);
+        llvm::Value *Val = 
CGF.Builder.CreateLoad(TmpVal.getAggregateAddress());
+        CGF.EmitAggregateStore(Val, srcLV.getAddress(), false);
+      }
+    } else {
+      llvm::Value *Val = CGF.Builder.CreateLoad(writeback.Temporary);
+      if (srcLV.isSimple()) {
+        CGF.EmitAggregateStore(Val, srcLV.getAddress(), false);
+      } else {
+        RValue TmpVal = RValue::get(Val);
         CGF.EmitStoreThroughLValue(TmpVal, srcLV);
       }
     }
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl 
b/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl
index 8ed02bb4bbb76a..b64524a13c9231 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl
@@ -126,3 +126,58 @@ export int case5() {
   increment(I);
   return I;
 }
+
+// Case 6: Aggregate out parameters.
+struct S {
+  int X;
+  float Y;
+};
+
+void init(out S s) {
+  s.X = 3;
+  s.Y = 4;
+}
+
+// ALL-LABEL: define noundef i32 {{.*}}case6
+
+// CHECK: [[S:%.*]] = alloca %struct.S
+// CHECK: [[Tmp:%.*]] = alloca %struct.S
+// CHECK: call void {{.*}}init{{.*}}(ptr noundef nonnull align 4 
dereferenceable(8) [[Tmp]])
+// CHECK: [[RetVal:%.*]] = load %struct.S, ptr [[Tmp]]
+// CHECK: [[XAddr:%.*]] = getelementptr inbounds %struct.S, ptr [[S]], i32 0, 
i32 0
+// CHECK: [[XVal:%.*]] = extractvalue %struct.S [[RetVal]], 0
+// CHECK: store i32 [[XVal]], ptr [[XAddr]]
+// CHECK: [[YAddr:%.*]] = getelementptr inbounds %struct.S, ptr [[S]], i32 0, 
i32 1
+// CHECK: [[YVal:%.*]] = extractvalue %struct.S [[RetVal]], 1
+// CHECK: store float [[YVal]], ptr [[YAddr]]
+
+// OPT: ret i32 7
+export int case6() {
+  S s;
+  init(s);
+  return s.X + s.Y;
+}
+
+// Case 7: Non-scalars with a cast expression.
+void trunc_vec(inout int3 V) {}
+
+// ALL-LABEL: define noundef <3 x float> {{.*}}case7
+
+// CHECK: [[V:%.*]] = alloca <3 x float>
+// CHECK: [[Tmp:%.*]] = alloca <3 x i32>
+// CHECK: [[FVal:%.*]] = load <3 x float>, ptr [[V]]
+// CHECK: [[IVal:%.*]] = fptosi <3 x float> [[FVal]] to <3 x i32>
+// CHECK: store <3 x i32> [[IVal]], ptr [[Tmp]]
+// CHECK: call void {{.*}}trunc_vec{{.*}}(ptr noundef nonnull align 16 
dereferenceable(16) [[Tmp]])
+// CHECK: [[IRet:%.*]] = load <3 x i32>, ptr [[Tmp]]
+// CHECK: [[FRet:%.*]] = sitofp <3 x i32> [[IRet]] to <3 x float>
+// CHECK: store <3 x float> [[FRet]], ptr [[V]]
+
+// OPT: [[IVal:%.*]] = fptosi <3 x float> {{.*}} to <3 x i32>
+// OPT: [[FVal:%.*]] = sitofp <3 x i32> [[IVal]] to <3 x float>
+// OPT: ret <3 x float> [[FVal]]
+
+export float3 case7(float3 V) {
+  trunc_vec(V);
+  return V;
+}

>From d865dbf0891810ace90ae6c1e82104958d9c6c8f Mon Sep 17 00:00:00 2001
From: Chris Bieneman <chris.biene...@me.com>
Date: Mon, 29 Jul 2024 20:19:41 -0500
Subject: [PATCH 03/10] Move parameter ABI qualifier mismatch

This change moves the verification for parameter ABI qualifier mismatch
earlier and only merges declarations if the parameter modifiers are
compatible.

There is one case where we do need to merge parameter ABI annotations,
this is in the case for explicit `in` and implicit `in` (no attribute).
All other cases are handled during attribute merging.

This addresses feedback from @efriedma-quic, thank you!
---
 clang/include/clang/AST/ASTContext.h         |  8 -----
 clang/include/clang/Sema/SemaHLSL.h          |  1 +
 clang/lib/AST/ASTContext.cpp                 | 14 ---------
 clang/lib/Sema/SemaDecl.cpp                  | 31 +++++--------------
 clang/lib/Sema/SemaHLSL.cpp                  | 32 ++++++++++++++++++++
 clang/test/SemaHLSL/parameter_modifiers.hlsl |  2 +-
 6 files changed, 41 insertions(+), 47 deletions(-)

diff --git a/clang/include/clang/AST/ASTContext.h 
b/clang/include/clang/AST/ASTContext.h
index e81c7170a022ce..6d1c8ca8a2f961 100644
--- a/clang/include/clang/AST/ASTContext.h
+++ b/clang/include/clang/AST/ASTContext.h
@@ -1377,14 +1377,6 @@ class ASTContext : public RefCountedBase<ASTContext> {
   /// in the return type and parameter types.
   bool hasSameFunctionTypeIgnoringPtrSizes(QualType T, QualType U);
 
-  /// Get or construct a function type that is equivalent to the input type
-  /// except that the parameter ABI annotations are stripped.
-  QualType getFunctionTypeWithoutParamABIs(QualType T);
-
-  /// Determine if two function types are the same, ignoring parameter ABI
-  /// annotations.
-  bool hasSameFunctionTypeIgnoringParamABI(QualType T, QualType U);
-
   /// Return the uniqued reference to the type for a complex
   /// number with the specified element type.
   QualType getComplexType(QualType T) const;
diff --git a/clang/include/clang/Sema/SemaHLSL.h 
b/clang/include/clang/Sema/SemaHLSL.h
index 64b565787f3257..8e1965c252e7d9 100644
--- a/clang/include/clang/Sema/SemaHLSL.h
+++ b/clang/include/clang/Sema/SemaHLSL.h
@@ -61,6 +61,7 @@ class SemaHLSL : public SemaBase {
   void handleParamModifierAttr(Decl *D, const ParsedAttr &AL);
 
   bool CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
+  bool CheckCompatibleParameterABI(FunctionDecl *New, FunctionDecl *Old);
 
   ExprResult ActOnOutParamExpr(ParmVarDecl *Param, Expr *Arg);
 };
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 750928fc009280..a465cdfcf3c89e 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -3590,20 +3590,6 @@ bool 
ASTContext::hasSameFunctionTypeIgnoringPtrSizes(QualType T, QualType U) {
                      getFunctionTypeWithoutPtrSizes(U));
 }
 
-QualType ASTContext::getFunctionTypeWithoutParamABIs(QualType T) {
-  if (const auto *Proto = T->getAs<FunctionProtoType>()) {
-    FunctionProtoType::ExtProtoInfo EPI = Proto->getExtProtoInfo();
-    EPI.ExtParameterInfos = nullptr;
-    return getFunctionType(Proto->getReturnType(), Proto->param_types(), EPI);
-  }
-  return T;
-}
-
-bool ASTContext::hasSameFunctionTypeIgnoringParamABI(QualType T, QualType U) {
-  return hasSameType(T, U) || hasSameType(getFunctionTypeWithoutParamABIs(T),
-                                          getFunctionTypeWithoutParamABIs(U));
-}
-
 void ASTContext::adjustExceptionSpec(
     FunctionDecl *FD, const FunctionProtoType::ExceptionSpecInfo &ESI,
     bool AsWritten) {
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index fa8e79299a77e0..315e8bf4eee649 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -3244,26 +3244,6 @@ static void mergeParamDeclAttributes(ParmVarDecl 
*newDecl,
            diag::note_carries_dependency_missing_first_decl) << 1/*Param*/;
   }
 
-  // HLSL parameter declarations for inout and out must match between
-  // declarations. In HLSL inout and out are ambiguous at the call site, but
-  // have different calling behavior, so you cannot overload a method based on 
a
-  // difference between inout and out annotations.
-  if (S.getLangOpts().HLSL) {
-    const auto *NDAttr = newDecl->getAttr<HLSLParamModifierAttr>();
-    const auto *ODAttr = oldDecl->getAttr<HLSLParamModifierAttr>();
-    // We don't need to cover the case where one declaration doesn't have an
-    // attribute. The only possible case there is if one declaration has an 
`in`
-    // attribute and the other declaration has no attribute. This case is
-    // allowed since parameters are `in` by default.
-    if (NDAttr && ODAttr &&
-        NDAttr->getSpellingListIndex() != ODAttr->getSpellingListIndex()) {
-      S.Diag(newDecl->getLocation(), diag::err_hlsl_param_qualifier_mismatch)
-          << NDAttr << newDecl;
-      S.Diag(oldDecl->getLocation(), diag::note_previous_declaration_as)
-          << ODAttr;
-    }
-  }
-
   if (!oldDecl->hasAttrs())
     return;
 
@@ -4049,6 +4029,13 @@ bool Sema::MergeFunctionDecl(FunctionDecl *New, 
NamedDecl *&OldD, Scope *S,
       }
     }
 
+    // HLSL check parameters for matching ABI specifications.
+    if (getLangOpts().HLSL) {
+      if(HLSL().CheckCompatibleParameterABI(New, Old))
+        return true;
+      return MergeCompatibleFunctionDecls(New, Old, S, MergeTypeWithOld);
+    }
+
     // If the function types are compatible, merge the declarations. Ignore the
     // exception specifier because it was already checked above in
     // CheckEquivalentExceptionSpec, and we don't want follow-on diagnostics
@@ -4057,10 +4044,6 @@ bool Sema::MergeFunctionDecl(FunctionDecl *New, 
NamedDecl *&OldD, Scope *S,
                                                          NewQType))
       return MergeCompatibleFunctionDecls(New, Old, S, MergeTypeWithOld);
 
-    if (getLangOpts().HLSL && Context.hasSameFunctionTypeIgnoringParamABI(
-                                  OldQTypeForComparison, NewQType))
-      return MergeCompatibleFunctionDecls(New, Old, S, MergeTypeWithOld);
-
     // If the types are imprecise (due to dependent constructs in friends or
     // local extern declarations), it's OK if they differ. We'll check again
     // during instantiation.
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 3118fe577d7747..dcc0eca3b09ef2 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -1122,6 +1122,38 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned 
BuiltinID, CallExpr *TheCall) {
   return false;
 }
 
+bool SemaHLSL::CheckCompatibleParameterABI(FunctionDecl *New,
+                                           FunctionDecl *Old) {
+  if (New->getNumParams() != Old->getNumParams())
+    return true;
+
+  bool HadError = false;
+
+  for (unsigned i = 0, e = New->getNumParams(); i != e; ++i) {
+    ParmVarDecl *NewParam = New->getParamDecl(i);
+    ParmVarDecl *OldParam = Old->getParamDecl(i);
+
+    // HLSL parameter declarations for inout and out must match between
+    // declarations. In HLSL inout and out are ambiguous at the call site,
+    // but have different calling behavior, so you cannot overload a
+    // method based on a difference between inout and out annotations.
+    const auto *NDAttr = NewParam->getAttr<HLSLParamModifierAttr>();
+    unsigned NSpellingIdx = (NDAttr ? NDAttr->getSpellingListIndex() : 0);
+    const auto *ODAttr = OldParam->getAttr<HLSLParamModifierAttr>();
+    unsigned OSpellingIdx = (ODAttr ? ODAttr->getSpellingListIndex() : 0);
+
+    if (NSpellingIdx != OSpellingIdx) {
+      SemaRef.Diag(NewParam->getLocation(),
+                   diag::err_hlsl_param_qualifier_mismatch)
+          << NDAttr << NewParam;
+      SemaRef.Diag(OldParam->getLocation(), diag::note_previous_declaration_as)
+          << ODAttr;
+      HadError = true;
+    }
+  }
+  return HadError;
+}
+
 ExprResult SemaHLSL::ActOnOutParamExpr(ParmVarDecl *Param, Expr *Arg) {
   assert(Param->hasAttr<HLSLParamModifierAttr>() &&
          "We should not get here without a parameter modifier expression");
diff --git a/clang/test/SemaHLSL/parameter_modifiers.hlsl 
b/clang/test/SemaHLSL/parameter_modifiers.hlsl
index cf739851e4547a..2347c17ce0afdd 100644
--- a/clang/test/SemaHLSL/parameter_modifiers.hlsl
+++ b/clang/test/SemaHLSL/parameter_modifiers.hlsl
@@ -35,7 +35,7 @@ void fn(in float f); // #fn-in
 void failOverloadResolution() {
   float f = 1.0;
   fn(f); // expected-error{{call to 'fn' is ambiguous}}
-  // expected-note@#fn-def{{candidate function}}
+  // expected-note@#fn{{candidate function}}
   // expected-note@#fn-in{{candidate function}}
 }
 

>From 6c8c058d2fad499a907dc357daeca21d5001c9b3 Mon Sep 17 00:00:00 2001
From: Chris Bieneman <chris.biene...@me.com>
Date: Mon, 29 Jul 2024 20:21:21 -0500
Subject: [PATCH 04/10] Fix clang-format

---
 clang/lib/Sema/SemaDecl.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 315e8bf4eee649..48ad2fb9e89d1d 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -4031,7 +4031,7 @@ bool Sema::MergeFunctionDecl(FunctionDecl *New, NamedDecl 
*&OldD, Scope *S,
 
     // HLSL check parameters for matching ABI specifications.
     if (getLangOpts().HLSL) {
-      if(HLSL().CheckCompatibleParameterABI(New, Old))
+      if (HLSL().CheckCompatibleParameterABI(New, Old))
         return true;
       return MergeCompatibleFunctionDecls(New, Old, S, MergeTypeWithOld);
     }

>From 3441e910c1e77c96e152a2eb26a18b2c4f83d30b Mon Sep 17 00:00:00 2001
From: Chris Bieneman <chris.biene...@me.com>
Date: Tue, 30 Jul 2024 08:49:54 -0500
Subject: [PATCH 05/10] Limit when function declarations are merged

This change makes it so that function declarations are only merged when
no parameter ABI mismatches occur and when the functions have the same
type ignoring the paramater ABIs.

This allows handling cases where implict and explicit parameter ABIs
are the same as with the implicit in and explict in for HLSL.
---
 clang/include/clang/AST/ASTContext.h |  8 ++++++++
 clang/lib/AST/ASTContext.cpp         | 15 +++++++++++++++
 clang/lib/Sema/SemaDecl.cpp          | 11 ++++++++++-
 3 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/clang/include/clang/AST/ASTContext.h 
b/clang/include/clang/AST/ASTContext.h
index 6d1c8ca8a2f961..6957afde53d9d9 100644
--- a/clang/include/clang/AST/ASTContext.h
+++ b/clang/include/clang/AST/ASTContext.h
@@ -1377,6 +1377,14 @@ class ASTContext : public RefCountedBase<ASTContext> {
   /// in the return type and parameter types.
   bool hasSameFunctionTypeIgnoringPtrSizes(QualType T, QualType U);
 
+  /// Get or construct a function type that is equivalent to the input type
+  /// except that the parameter ABI annotations are stripped.
+  QualType getFunctionTypeWithoutParamABIs(QualType T) const;
+
+  /// Determine if two function types are the same, ignoring parameter ABI
+  /// annotations.
+  bool hasSameFunctionTypeIgnoringParamABI(QualType T, QualType U) const;
+
   /// Return the uniqued reference to the type for a complex
   /// number with the specified element type.
   QualType getComplexType(QualType T) const;
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index a465cdfcf3c89e..667aa30ef75eb5 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -3590,6 +3590,21 @@ bool 
ASTContext::hasSameFunctionTypeIgnoringPtrSizes(QualType T, QualType U) {
                      getFunctionTypeWithoutPtrSizes(U));
 }
 
+QualType ASTContext::getFunctionTypeWithoutParamABIs(QualType T) const {
+  if (const auto *Proto = T->getAs<FunctionProtoType>()) {
+    FunctionProtoType::ExtProtoInfo EPI = Proto->getExtProtoInfo();
+    EPI.ExtParameterInfos = nullptr;
+    return getFunctionType(Proto->getReturnType(), Proto->param_types(), EPI);
+  }
+  return T;
+}
+
+bool ASTContext::hasSameFunctionTypeIgnoringParamABI(QualType T,
+                                                     QualType U) const {
+  return hasSameType(T, U) || hasSameType(getFunctionTypeWithoutParamABIs(T),
+                                          getFunctionTypeWithoutParamABIs(U));
+}
+
 void ASTContext::adjustExceptionSpec(
     FunctionDecl *FD, const FunctionProtoType::ExceptionSpecInfo &ESI,
     bool AsWritten) {
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 48ad2fb9e89d1d..74f0b37f991d3c 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -4033,7 +4033,16 @@ bool Sema::MergeFunctionDecl(FunctionDecl *New, 
NamedDecl *&OldD, Scope *S,
     if (getLangOpts().HLSL) {
       if (HLSL().CheckCompatibleParameterABI(New, Old))
         return true;
-      return MergeCompatibleFunctionDecls(New, Old, S, MergeTypeWithOld);
+
+      // If no errors are generated when checking parameter ABIs we can check 
if
+      // the two declarations have the same type ignoring the ABIs and if so,
+      // the declarations can be merged. This case for merging is only valid in
+      // HLSL because there are no valid cases of merging mismatched parameter
+      // ABIs except the HLSL implicit in and explicit in.
+      if (Context.hasSameFunctionTypeIgnoringParamABI(OldQTypeForComparison,
+                                                      NewQType))
+        return MergeCompatibleFunctionDecls(New, Old, S, MergeTypeWithOld);
+      // Fall through for conflicting redeclarations and redefinitions.
     }
 
     // If the function types are compatible, merge the declarations. Ignore the

>From 2283f0aedfd9dd038d08f305a1f302554cb020f0 Mon Sep 17 00:00:00 2001
From: Chris Bieneman <chris.biene...@me.com>
Date: Wed, 31 Jul 2024 09:23:10 -0500
Subject: [PATCH 06/10] Resolve FIXME/TODO notes

One of these was left behind to properly put in `noalias` call argument
attributes, the other was there while I was working out the writeback
logic and is resolved and verified by the tests.
---
 clang/lib/CodeGen/CGCall.cpp                       |  4 ++--
 .../CodeGenHLSL/BasicFeatures/OutputArguments.hlsl | 14 +++++++-------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 018938eb53621a..7c825047c9d37c 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -2832,7 +2832,8 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
     switch (FI.getExtParameterInfo(ArgNo).getABI()) {
     case ParameterABI::HLSLOut:
     case ParameterABI::HLSLInOut:
-      // FIXME: Do this...
+      Attrs.addAttribute(llvm::Attribute::NoAlias);
+      break;
     case ParameterABI::Ordinary:
       break;
 
@@ -4739,7 +4740,6 @@ void CodeGenFunction::EmitCallArg(CallArgList &args, 
const Expr *E,
     llvm::Value *LifetimeSize = EmitLifetimeStart(Sz, BaseAddr);
 
     Address TmpAddr(Addr, ElTy, LV.getAlignment());
-    // TODO-HLSLOutArgExp: Fix me!!!
     args.addWriteback(EmitLValue(OE->getBase()->IgnoreImpCasts()), TmpAddr,
                       nullptr, OE->getWriteback(), LifetimeSize);
     return args.add(RValue::get(TmpAddr, *this), type);
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl 
b/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl
index b64524a13c9231..be7a17f43e845b 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl
@@ -13,7 +13,7 @@ void trunc_Param(inout int X) {}
 // CHECK: [[FVal:%.*]] = load float, ptr {{.*}}
 // CHECK: [[IVal:%.*]] = fptosi float [[FVal]] to i32
 // CHECK: store i32 [[IVal]], ptr [[ArgTmp]]
-// CHECK: call void {{.*}}trunc_Param{{.*}}(ptr noundef nonnull align 4 
dereferenceable(4) [[ArgTmp]])
+// CHECK: call void {{.*}}trunc_Param{{.*}}(ptr noalias noundef nonnull align 
4 dereferenceable(4) [[ArgTmp]])
 // CHECK: [[IRet:%.*]] = load i32, ptr [[ArgTmp]]
 // CHECK: [[FRet:%.*]] = sitofp i32 [[IRet]] to float
 // CHECK: store float [[FRet]], ptr [[F]]
@@ -35,7 +35,7 @@ void undef(out int Z) { }
 // CHECK: [[V:%.*]] = alloca i32
 // CHECK: [[ArgTmp:%.*]] = alloca i32
 // CHECK-NOT: store {{.*}}, ptr [[ArgTmp]]
-// CHECK: call void {{.*}}unde{{.*}}(ptr noundef nonnull align 4 
dereferenceable(4) [[ArgTmp]])
+// CHECK: call void {{.*}}unde{{.*}}(ptr noalias noundef nonnull align 4 
dereferenceable(4) [[ArgTmp]])
 // CHECK-NOT: store {{.*}}, ptr [[ArgTmp]]
 // CHECK: [[Res:%.*]] = load i32, ptr [[ArgTmp]]
 // CHECK: store i32 [[Res]], ptr [[V]], align 4
@@ -54,7 +54,7 @@ void zero(out int Z) { Z = 0; }
 // CHECK: [[V:%.*]] = alloca i32
 // CHECK: [[ArgTmp:%.*]] = alloca i32
 // CHECK-NOT: store {{.*}}, ptr [[ArgTmp]]
-// CHECK: call void {{.*}}zero{{.*}}(ptr noundef nonnull align 4 
dereferenceable(4) [[ArgTmp]])
+// CHECK: call void {{.*}}zero{{.*}}(ptr noalias noundef nonnull align 4 
dereferenceable(4) [[ArgTmp]])
 // CHECK-NOT: store {{.*}}, ptr [[ArgTmp]]
 // CHECK: [[Res:%.*]] = load i32, ptr [[ArgTmp]]
 // CHECK: store i32 [[Res]], ptr [[V]], align 4
@@ -90,7 +90,7 @@ void funky(inout int3 X) {
 // CHECK:  store <3 x i32> [[Vyzx]], ptr [[ArgTmp]]
 
 // Call the function with the temporary.
-// CHECK: call void {{.*}}funky{{.*}}(ptr noundef nonnull align 16 
dereferenceable(16) [[ArgTmp]])
+// CHECK: call void {{.*}}funky{{.*}}(ptr noalias noundef nonnull align 16 
dereferenceable(16) [[ArgTmp]])
 
 // Shuffle it back.
 // CHECK:  [[RetVal:%.*]] = load <3 x i32>, ptr [[ArgTmp]]
@@ -117,7 +117,7 @@ void increment(inout int I) {
 // CHECK: store i32 4, ptr [[I]]
 // CHECK: [[IInit:%.*]] = load i32, ptr [[I]]
 // CHECK: store i32 [[IInit:%.*]], ptr [[ArgTmp]], align 4
-// CHECK: call void {{.*}}increment{{.*}}(ptr noundef nonnull align 4 
dereferenceable(4) [[ArgTmp]])
+// CHECK: call void {{.*}}increment{{.*}}(ptr noalias noundef nonnull align 4 
dereferenceable(4) [[ArgTmp]])
 // CHECK: [[RetVal:%.*]] = load i32, ptr [[ArgTmp]]
 // CHECK: store i32 [[RetVal]], ptr [[I]], align 4
 // OPT: ret i32 5
@@ -142,7 +142,7 @@ void init(out S s) {
 
 // CHECK: [[S:%.*]] = alloca %struct.S
 // CHECK: [[Tmp:%.*]] = alloca %struct.S
-// CHECK: call void {{.*}}init{{.*}}(ptr noundef nonnull align 4 
dereferenceable(8) [[Tmp]])
+// CHECK: call void {{.*}}init{{.*}}(ptr noalias noundef nonnull align 4 
dereferenceable(8) [[Tmp]])
 // CHECK: [[RetVal:%.*]] = load %struct.S, ptr [[Tmp]]
 // CHECK: [[XAddr:%.*]] = getelementptr inbounds %struct.S, ptr [[S]], i32 0, 
i32 0
 // CHECK: [[XVal:%.*]] = extractvalue %struct.S [[RetVal]], 0
@@ -168,7 +168,7 @@ void trunc_vec(inout int3 V) {}
 // CHECK: [[FVal:%.*]] = load <3 x float>, ptr [[V]]
 // CHECK: [[IVal:%.*]] = fptosi <3 x float> [[FVal]] to <3 x i32>
 // CHECK: store <3 x i32> [[IVal]], ptr [[Tmp]]
-// CHECK: call void {{.*}}trunc_vec{{.*}}(ptr noundef nonnull align 16 
dereferenceable(16) [[Tmp]])
+// CHECK: call void {{.*}}trunc_vec{{.*}}(ptr noalias noundef nonnull align 16 
dereferenceable(16) [[Tmp]])
 // CHECK: [[IRet:%.*]] = load <3 x i32>, ptr [[Tmp]]
 // CHECK: [[FRet:%.*]] = sitofp <3 x i32> [[IRet]] to <3 x float>
 // CHECK: store <3 x float> [[FRet]], ptr [[V]]

>From 6b66117993c8c7a2296820f53240e518237a99b9 Mon Sep 17 00:00:00 2001
From: Chris Bieneman <chris.biene...@me.com>
Date: Wed, 31 Jul 2024 13:43:57 -0500
Subject: [PATCH 07/10] One last fix to correctly annotate parameter types

We should be annotating the parameter types as restrict references.
This allows the alias information within a function to correctly
identify non-aliasing parameters.
---
 clang/include/clang/Sema/SemaHLSL.h              |  2 ++
 clang/lib/Sema/SemaHLSL.cpp                      |  6 ++++++
 clang/lib/Sema/SemaTemplateInstantiateDecl.cpp   |  3 ++-
 clang/lib/Sema/SemaType.cpp                      |  3 ++-
 clang/test/AST/HLSL/OutArgExpr.hlsl              |  9 +++++----
 .../BasicFeatures/OutputArguments.hlsl           | 16 +++++++++++++++-
 clang/test/SemaHLSL/Language/TemplateOutArg.hlsl |  6 +++---
 clang/test/SemaHLSL/parameter_modifiers_ast.hlsl | 10 +++++-----
 8 files changed, 40 insertions(+), 15 deletions(-)

diff --git a/clang/include/clang/Sema/SemaHLSL.h 
b/clang/include/clang/Sema/SemaHLSL.h
index 8e1965c252e7d9..6ecba4e32c2660 100644
--- a/clang/include/clang/Sema/SemaHLSL.h
+++ b/clang/include/clang/Sema/SemaHLSL.h
@@ -64,6 +64,8 @@ class SemaHLSL : public SemaBase {
   bool CheckCompatibleParameterABI(FunctionDecl *New, FunctionDecl *Old);
 
   ExprResult ActOnOutParamExpr(ParmVarDecl *Param, Expr *Arg);
+
+  QualType getInoutParameterType(QualType Ty);
 };
 
 } // namespace clang
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index dcc0eca3b09ef2..6236dc3c1ae23f 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -1211,3 +1211,9 @@ ExprResult SemaHLSL::ActOnOutParamExpr(ParmVarDecl 
*Param, Expr *Arg) {
       OpaqueValueExpr(Param->getBeginLoc(), Ty, VK_LValue, OK_Ordinary, Arg);
   return ExprResult(HLSLOutArgExpr::Create(Ctx, Ty, Arg, IsInOut, OpV, OpV));
 }
+
+QualType SemaHLSL::getInoutParameterType(QualType Ty) {
+  Ty = SemaRef.getASTContext().getLValueReferenceType(Ty);
+  Ty.addRestrict();
+  return Ty;
+}
diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp 
b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
index a12d2eff1d2c81..161328c2d17ce7 100644
--- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -28,6 +28,7 @@
 #include "clang/Sema/ScopeInfo.h"
 #include "clang/Sema/SemaAMDGPU.h"
 #include "clang/Sema/SemaCUDA.h"
+#include "clang/Sema/SemaHLSL.h"
 #include "clang/Sema/SemaInternal.h"
 #include "clang/Sema/SemaObjC.h"
 #include "clang/Sema/SemaOpenMP.h"
@@ -696,7 +697,7 @@ static void instantiateDependentHLSLParamModifierAttr(
     const HLSLParamModifierAttr *Attr, Decl *New) {
   ParmVarDecl *P = cast<ParmVarDecl>(New);
   P->addAttr(Attr->clone(S.getASTContext()));
-  P->setType(S.getASTContext().getLValueReferenceType(P->getType()));
+  P->setType(S.HLSL().getInoutParameterType(P->getType()));
 }
 
 void Sema::InstantiateAttrsForDecl(
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index 699154d49cacb4..8582aba721b150 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -36,6 +36,7 @@
 #include "clang/Sema/ScopeInfo.h"
 #include "clang/Sema/SemaCUDA.h"
 #include "clang/Sema/SemaInternal.h"
+#include "clang/Sema/SemaHLSL.h"
 #include "clang/Sema/SemaObjC.h"
 #include "clang/Sema/SemaOpenMP.h"
 #include "clang/Sema/Template.h"
@@ -8491,7 +8492,7 @@ static void HandleHLSLParamModifierAttr(QualType &CurType,
     return;
   if (Attr.getSemanticSpelling() == HLSLParamModifierAttr::Keyword_inout ||
       Attr.getSemanticSpelling() == HLSLParamModifierAttr::Keyword_out)
-    CurType = S.getASTContext().getLValueReferenceType(CurType);
+    CurType = S.HLSL().getInoutParameterType(CurType);
 }
 
 static void processTypeAttrs(TypeProcessingState &state, QualType &type,
diff --git a/clang/test/AST/HLSL/OutArgExpr.hlsl 
b/clang/test/AST/HLSL/OutArgExpr.hlsl
index 8ad77bd77a88b3..4e34de8f85ffbe 100644
--- a/clang/test/AST/HLSL/OutArgExpr.hlsl
+++ b/clang/test/AST/HLSL/OutArgExpr.hlsl
@@ -1,3 +1,4 @@
+// RUN: rm -f %t.pch
 // RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-pch 
-finclude-default-header -o %t.pch %s
 // RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute 
-finclude-default-header -include-pch %t.pch %s -ast-dump | FileCheck 
--check-prefix=AST %s
 // RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute 
-finclude-default-header -include-pch %t.pch %s -ast-print | FileCheck %s
@@ -8,19 +9,19 @@
 
 RWBuffer<float> Buf;
 
-// CHECK: void trunc_Param(inout int &X) {
+// CHECK: void trunc_Param(inout int &__restrict X) {
 
 // AST: FunctionDecl {{.*}} used trunc_Param 'void (inout int)'
-// AST-NEXT: ParmVarDecl {{.*}} X 'int &'
+// AST-NEXT: ParmVarDecl {{.*}} X 'int &__restrict'
 // AST-NEXT: HLSLParamModifierAttr {{.*}} inout
 
 void trunc_Param(inout int X) {}
 
-// CHECK: void zero(out int &Z) {
+// CHECK: void zero(out int &__restrict Z) {
 // CHECK-NEXT: Z = 0;
 
 // AST: FunctionDecl {{.*}} zero 'void (out int)'
-// AST-NEXT: ParmVarDecl {{.*}} used Z 'int &'
+// AST-NEXT: ParmVarDecl {{.*}} used Z 'int &__restrict'
 // AST-NEXT: HLSLParamModifierAttr {{.*}} out
 void zero(out int Z) { Z = 0; }
 
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl 
b/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl
index be7a17f43e845b..25e67aebf0a241 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl
@@ -5,6 +5,8 @@
 // In this test case a float value is passed to an inout parameter taking an
 // integer. It is converted to an integer on call and converted back after the
 // function.
+
+// CHECK: define void {{.*}}trunc_Param{{.*}}(ptr noalias noundef nonnull 
align 4 dereferenceable(4) {{%.*}})
 void trunc_Param(inout int X) {}
 
 // ALL-LABEL: define noundef float {{.*}}case1
@@ -29,6 +31,8 @@ export float case1(float F) {
 // `out` parameters are not pre-initialized by the caller, so they are
 // uninitialized in the function. If they are not initialized before the
 // function returns the value is undefined.
+
+// CHECK: define void {{.*}}undef{{.*}}(ptr noalias noundef nonnull align 4 
dereferenceable(4) {{%.*}})
 void undef(out int Z) { }
 
 // ALL-LABEL: define noundef i32 {{.*}}case2
@@ -47,7 +51,10 @@ export int case2() {
 }
 
 // Case 3: Simple initialized `out` parameter.
-// This test should verify that an out parameter value is written to as 
expected.
+// This test should verify that an out parameter value is written to as
+// expected.
+
+// CHECK: define void {{.*}}zero{{.*}}(ptr noalias noundef nonnull align 4 
dereferenceable(4) {{%.*}})
 void zero(out int Z) { Z = 0; }
 
 // ALL-LABEL: define noundef i32 {{.*}}case3
@@ -68,6 +75,8 @@ export int case3() {
 // Case 4: Vector swizzle arguments.
 // Vector swizzles in HLSL produce lvalues, so they can be used as arguments to
 // inout parameters and the swizzle is reversed on writeback.
+
+// CHECK: define void {{.*}}funky{{.*}}(ptr noalias noundef nonnull align 16 
dereferenceable(16) {{%.*}})
 void funky(inout int3 X) {
   X.x += 1;
   X.y += 2;
@@ -106,6 +115,8 @@ export int3 case4() {
 
 
 // Case 5: Straightforward inout of a scalar value.
+
+// CHECK: define void {{.*}}increment{{.*}}(ptr noalias noundef nonnull align 
4 dereferenceable(4) {{%.*}})
 void increment(inout int I) {
   I += 1;
 }
@@ -133,6 +144,7 @@ struct S {
   float Y;
 };
 
+// CHECK: define void {{.*}}init{{.*}}(ptr noalias noundef nonnull align 4 
dereferenceable(8) {{%.*}})
 void init(out S s) {
   s.X = 3;
   s.Y = 4;
@@ -159,6 +171,8 @@ export int case6() {
 }
 
 // Case 7: Non-scalars with a cast expression.
+
+// CHECK: define void {{.*}}trunc_vec{{.*}}(ptr noalias noundef nonnull align 
16 dereferenceable(16) {{%.*}})
 void trunc_vec(inout int3 V) {}
 
 // ALL-LABEL: define noundef <3 x float> {{.*}}case7
diff --git a/clang/test/SemaHLSL/Language/TemplateOutArg.hlsl 
b/clang/test/SemaHLSL/Language/TemplateOutArg.hlsl
index 0416806b569522..bec81f16b40fb2 100644
--- a/clang/test/SemaHLSL/Language/TemplateOutArg.hlsl
+++ b/clang/test/SemaHLSL/Language/TemplateOutArg.hlsl
@@ -85,15 +85,15 @@ T wrapper(T V) {
 // Check the 3 instantiations (int, float, & double).
 
 // CHECK-LABEL: FunctionDecl {{.*}} used fizz 'void (inout int)' 
implicit_instantiation
-// CHECK: ParmVarDecl {{.*}} used V 'int &'
+// CHECK: ParmVarDecl {{.*}} used V 'int &__restrict'
 // CHECK-NEXT: HLSLParamModifierAttr {{.*}} inout
 
 // CHECK-LABEL: FunctionDecl {{.*}} used fizz 'void (inout float)' 
implicit_instantiation
-// CHECK: ParmVarDecl {{.*}} used V 'float &'
+// CHECK: ParmVarDecl {{.*}} used V 'float &__restrict'
 // CHECK-NEXT: HLSLParamModifierAttr {{.*}} inout
 
 // CHECK-LABEL: FunctionDecl {{.*}} used fizz 'void (inout double)' 
implicit_instantiation
-// CHECK: ParmVarDecl {{.*}} used V 'double &'
+// CHECK: ParmVarDecl {{.*}} used V 'double &__restrict'
 // CHECK-NEXT: HLSLParamModifierAttr {{.*}} inout
 template <typename T>
 void fizz(inout T V) {
diff --git a/clang/test/SemaHLSL/parameter_modifiers_ast.hlsl 
b/clang/test/SemaHLSL/parameter_modifiers_ast.hlsl
index c5246045be259e..bf5ade162a01a1 100644
--- a/clang/test/SemaHLSL/parameter_modifiers_ast.hlsl
+++ b/clang/test/SemaHLSL/parameter_modifiers_ast.hlsl
@@ -12,25 +12,25 @@ void fn(float f);
 void fn2(in float f);
 
 // CHECK: FunctionDecl {{.*}} fn3 'void (out float)'
-// CHECK-NEXT: ParmVarDecl {{.*}} f 'float &'
+// CHECK-NEXT: ParmVarDecl {{.*}} f 'float &__restrict'
 // CHECK-NEXT: HLSLParamModifierAttr {{.*}} out
 // CHECK-NOT: HLSLParamModifierAttr
 void fn3(out float f);
 
 // CHECK: FunctionDecl {{.*}} fn4 'void (inout float)'
-// CHECK-NEXT: ParmVarDecl {{.*}} f 'float &'
+// CHECK-NEXT: ParmVarDecl {{.*}} f 'float &__restrict'
 // CHECK-NEXT: HLSLParamModifierAttr {{.*}} inout
 // CHECK-NOT: HLSLParamModifierAttr
 void fn4(inout float f);
 
 // CHECK: FunctionDecl {{.*}} fn5 'void (inout float)'
-// CHECK-NEXT: ParmVarDecl {{.*}} f 'float &'
+// CHECK-NEXT: ParmVarDecl {{.*}} f 'float &__restrict'
 // CHECK-NEXT: HLSLParamModifierAttr {{.*}} inout MergedSpelling
 // CHECK-NOT: HLSLParamModifierAttr
 void fn5(out in float f);
 
 // CHECK: FunctionDecl {{.*}} fn6 'void (inout float)'
-// CHECK-NEXT: ParmVarDecl {{.*}} f 'float &'
+// CHECK-NEXT: ParmVarDecl {{.*}} f 'float &__restrict'
 // CHECK-NEXT: HLSLParamModifierAttr {{.*}} inout MergedSpelling
 // CHECK-NOT: HLSLParamModifierAttr
 void fn6(in out float f);
@@ -43,7 +43,7 @@ void fn6(in out float f);
 // CHECK-NEXT: FunctionDecl [[Instantiation:0x[0-9a-fA-F]+]] {{.*}} used fn7 
'void (inout float)' implicit_instantiation
 // CHECK-NEXT: TemplateArgument type 'float'
 // CHECK-NEXT:  BuiltinType {{.*}} 'float'
-// CHECK-NEXT: ParmVarDecl {{.*}} f 'float &'
+// CHECK-NEXT: ParmVarDecl {{.*}} f 'float &__restrict'
 // CHECK-NEXT: HLSLParamModifierAttr {{.*}} inout
 
 template <typename T>

>From 7acfe268a4ad0fc127a3e4c3b6e0259e8028ab43 Mon Sep 17 00:00:00 2001
From: Chris B <cbiene...@microsoft.com>
Date: Thu, 1 Aug 2024 13:44:02 -0500
Subject: [PATCH 08/10] Update
 clang/test/SemaHLSL/Language/OutputParameters.hlsl

Co-authored-by: Damyan Pepper <damy...@microsoft.com>
---
 clang/test/SemaHLSL/Language/OutputParameters.hlsl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/test/SemaHLSL/Language/OutputParameters.hlsl 
b/clang/test/SemaHLSL/Language/OutputParameters.hlsl
index 44358a2fbca24b..6d4d59771f8e47 100644
--- a/clang/test/SemaHLSL/Language/OutputParameters.hlsl
+++ b/clang/test/SemaHLSL/Language/OutputParameters.hlsl
@@ -5,7 +5,7 @@ void InOutVecFn(inout float3) {}
 
 // Case 1: Calling out and inout parameters with types that cannot be
 // back-converted. In HLSL 2021 and earlier this only occurs when passing 
scalar
-// arguments to vector parameters because scalar->vector converison is 
implicit,
+// arguments to vector parameters because scalar->vector conversion is 
implicit,
 // but vector->scalar is not.
 void case1() {
   float f;

>From c094804524e20dfdce0528bd76543ea3fba9428e Mon Sep 17 00:00:00 2001
From: Chris Bieneman <chris.biene...@me.com>
Date: Mon, 5 Aug 2024 13:23:59 -0500
Subject: [PATCH 09/10] Fix include ordering

I swear I know my A, B, C's...
---
 clang/lib/Sema/SemaType.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index 8582aba721b150..a1de852aeae63c 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -35,8 +35,8 @@
 #include "clang/Sema/ParsedTemplate.h"
 #include "clang/Sema/ScopeInfo.h"
 #include "clang/Sema/SemaCUDA.h"
-#include "clang/Sema/SemaInternal.h"
 #include "clang/Sema/SemaHLSL.h"
+#include "clang/Sema/SemaInternal.h"
 #include "clang/Sema/SemaObjC.h"
 #include "clang/Sema/SemaOpenMP.h"
 #include "clang/Sema/Template.h"

>From ee6b06555619cc32de3ed4a3b5f04e2d1a4434a9 Mon Sep 17 00:00:00 2001
From: Chris B <cbiene...@microsoft.com>
Date: Mon, 19 Aug 2024 16:52:27 -0500
Subject: [PATCH 10/10] Update clang/include/clang/AST/Expr.h

Co-authored-by: John McCall <rjmcc...@gmail.com>
---
 clang/include/clang/AST/Expr.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index 83eed4827cfe20..65df72701c81aa 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -7063,7 +7063,7 @@ class ArraySectionExpr : public Expr {
 
 /// This class represents temporary values used to represent inout and out
 /// arguments in HLSL. From the callee perspective these parameters are more or
-/// less __restrict__ T&. They are guaranteed to not alias any memory. inout
+/// less `T & restrict`. They are guaranteed to not alias any memory. inout
 /// parameters are initialized by the caller, and out parameters are references
 /// to uninitialized memory.
 ///

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to