[clang] 88788dc - [HLSL][Matrix] Implement dynamic single subscript swizzle (#173201)

via cfe-commits Wed, 07 Jan 2026 08:57:18 -0800

Author: Farzon Lotfi
Date: 2026-01-07T11:57:04-05:00
New Revision: 88788dcd5b999ecd611d4f00deece7c03e280ecc


URL: 
https://github.com/llvm/llvm-project/commit/88788dcd5b999ecd611d4f00deece7c03e280ecc
DIFF: 
https://github.com/llvm/llvm-project/commit/88788dcd5b999ecd611d4f00deece7c03e280ecc.diff

LOG: [HLSL][Matrix] Implement dynamic single subscript swizzle (#173201)

fixes #170777

If we don't use vector type and instead continue to pass on the matrix
type when we enter `EmitExtVectorElementExpr` Then we don't need to
store the row and column length on the LValue.

Using the Matrix type means we can reuse the isMatrixRow() cases in
EmitLoadOfLValue and EmitStoreThroughLValue and not have to support a
new lValue that is a hybrid between the ExtVectorElt and MatrixRow
cases.

All we need to do to support this is pass the list of column indices as
a `ConstantDataVector` and check the size of this Vector to know how
many column iterations we need to do. Further just index into the vector
to fetch the right encoded element index value.

Added: 
    

Modified: 
    clang/lib/CodeGen/CGExpr.cpp
    clang/lib/CodeGen/CGValue.h
    
clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptDynamicSwizzle.hlsl
    clang/test/SemaHLSL/matrix_single_subscript_errors.hlsl

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index ba2cdd3ea19dc..6309c37788f0c 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -2471,17 +2471,28 @@ RValue CodeGenFunction::EmitLoadOfLValue(LValue LV, 
SourceLocation Loc) {
 
     unsigned NumRows = MT->getNumRows();
     unsigned NumCols = MT->getNumColumns();
-
+    unsigned NumLanes = NumCols;
     llvm::Value *MatrixVec = EmitLoadOfScalar(LV, Loc);
     llvm::Value *Row = LV.getMatrixRowIdx();
     llvm::Type *ElemTy = ConvertType(MT->getElementType());
-    llvm::Type *RowTy = llvm::FixedVectorType::get(ElemTy, 
MT->getNumColumns());
-    llvm::Value *Result = llvm::PoisonValue::get(RowTy); // <NumCols x T>
-
+    llvm::Constant *ColConstsIndices = nullptr;
     llvm::MatrixBuilder MB(Builder);
 
-    for (unsigned Col = 0; Col < NumCols; ++Col) {
-      llvm::Value *ColIdx = llvm::ConstantInt::get(Row->getType(), Col);
+    if (LV.isMatrixRowSwizzle()) {
+      ColConstsIndices = LV.getMatrixRowElts();
+      NumLanes = llvm::cast<llvm::FixedVectorType>(ColConstsIndices->getType())
+                     ->getNumElements();
+    }
+
+    llvm::Type *RowTy = llvm::FixedVectorType::get(ElemTy, NumLanes);
+    llvm::Value *Result = llvm::PoisonValue::get(RowTy); // <NumLanes x T>
+
+    for (unsigned Col = 0; Col < NumLanes; ++Col) {
+      llvm::Value *ColIdx;
+      if (ColConstsIndices)
+        ColIdx = ColConstsIndices->getAggregateElement(Col);
+      else
+        ColIdx = llvm::ConstantInt::get(Row->getType(), Col);
       bool IsMatrixRowMajor = getLangOpts().getDefaultMatrixMemoryLayout() ==
                               LangOptions::MatrixMemoryLayout::MatrixRowMajor;
       llvm::Value *EltIndex =
@@ -2726,6 +2737,7 @@ void CodeGenFunction::EmitStoreThroughLValue(RValue Src, 
LValue Dst,
 
       unsigned NumRows = MT->getNumRows();
       unsigned NumCols = MT->getNumColumns();
+      unsigned NumLanes = NumCols;
 
       llvm::Value *MatrixVec =
           Builder.CreateLoad(Dst.getAddress(), "matrix.load");
@@ -2734,8 +2746,20 @@ void CodeGenFunction::EmitStoreThroughLValue(RValue Src, 
LValue Dst,
       llvm::Value *RowVal = Src.getScalarVal(); // <NumCols x T>
       llvm::MatrixBuilder MB(Builder);
 
-      for (unsigned Col = 0; Col < NumCols; ++Col) {
-        llvm::Value *ColIdx = llvm::ConstantInt::get(Row->getType(), Col);
+      llvm::Constant *ColConstsIndices = nullptr;
+      if (Dst.isMatrixRowSwizzle()) {
+        ColConstsIndices = Dst.getMatrixRowElts();
+        NumLanes =
+            llvm::cast<llvm::FixedVectorType>(ColConstsIndices->getType())
+                ->getNumElements();
+      }
+
+      for (unsigned Col = 0; Col < NumLanes; ++Col) {
+        llvm::Value *ColIdx;
+        if (ColConstsIndices)
+          ColIdx = ColConstsIndices->getAggregateElement(Col);
+        else
+          ColIdx = llvm::ConstantInt::get(Row->getType(), Col);
         bool IsMatrixRowMajor = getLangOpts().getDefaultMatrixMemoryLayout() ==
                                 
LangOptions::MatrixMemoryLayout::MatrixRowMajor;
         llvm::Value *EltIndex =
@@ -4967,11 +4991,9 @@ LValue CodeGenFunction::EmitMatrixSingleSubscriptExpr(
     const MatrixSingleSubscriptExpr *E) {
   LValue Base = EmitLValue(E->getBase());
   llvm::Value *RowIdx = EmitMatrixIndexExpr(E->getRowIdx());
-  const auto *MatTy = E->getBase()->getType()->castAs<ConstantMatrixType>();
   return LValue::MakeMatrixRow(
       MaybeConvertMatrixAddress(Base.getAddress(), *this), RowIdx,
-      MatTy->getNumColumns(), MatTy->getNumRows(), E->getBase()->getType(),
-      Base.getBaseInfo(), TBAAAccessInfo());
+      E->getBase()->getType(), Base.getBaseInfo(), TBAAAccessInfo());
 }
 
 LValue CodeGenFunction::EmitMatrixSubscriptExpr(const MatrixSubscriptExpr *E) {
@@ -5253,8 +5275,10 @@ EmitExtVectorElementExpr(const ExtVectorElementExpr *E) {
     if (auto *RowIdx =
             llvm::dyn_cast<llvm::ConstantInt>(Base.getMatrixRowIdx())) {
       llvm::SmallVector<llvm::Constant *> MatIndices;
-      unsigned NumCols = Base.getMatrixNumCols();
-      unsigned NumRows = Base.getMatrixNumRows();
+      QualType MatTy = Base.getType();
+      const ConstantMatrixType *MT = MatTy->castAs<ConstantMatrixType>();
+      unsigned NumCols = MT->getNumColumns();
+      unsigned NumRows = MT->getNumRows();
       MatIndices.reserve(NumCols);
 
       unsigned Row = RowIdx->getZExtValue();
@@ -5269,7 +5293,15 @@ EmitExtVectorElementExpr(const ExtVectorElementExpr *E) {
                                       E->getBase()->getType(),
                                       Base.getBaseInfo(), TBAAAccessInfo());
     }
-    return EmitUnsupportedLValue(E, "Matrix single index swizzle");
+    llvm::Constant *Cols =
+        llvm::ConstantDataVector::get(getLLVMContext(), Indices);
+    // Note: intentionally not using E.getType() so we can reuse isMatrixRow()
+    // implementations in EmitLoadOfLValue & EmitStoreThroughLValue and don't
+    // need the LValue to have its own number of rows and columns when the
+    // type is a vector.
+    return LValue::MakeMatrixRowSwizzle(
+        Base.getMatrixAddress(), Base.getMatrixRowIdx(), Cols, Base.getType(),
+        Base.getBaseInfo(), TBAAAccessInfo());
   }
 
   assert(Base.isExtVectorElt() && "Can only subscript lvalue vec elts here!");

diff  --git a/clang/lib/CodeGen/CGValue.h b/clang/lib/CodeGen/CGValue.h
index c3ae130014192..373ea55147404 100644
--- a/clang/lib/CodeGen/CGValue.h
+++ b/clang/lib/CodeGen/CGValue.h
@@ -211,7 +211,7 @@ class LValue {
   };
 
   // Note: Only meaningful when isMatrixRow() and the row is swizzled.
-  unsigned NumCols, NumRows;
+  llvm::Constant *MatrixRowElts = nullptr;
 
   QualType Type;
 
@@ -290,6 +290,9 @@ class LValue {
   bool isGlobalReg() const { return LVType == GlobalReg; }
   bool isMatrixElt() const { return LVType == MatrixElt; }
   bool isMatrixRow() const { return LVType == MatrixRow; }
+  bool isMatrixRowSwizzle() const {
+    return isMatrixRow() && MatrixRowElts != nullptr;
+  }
 
   bool isVolatileQualified() const { return Quals.hasVolatile(); }
   bool isRestrictQualified() const { return Quals.hasRestrict(); }
@@ -411,14 +414,9 @@ class LValue {
     return MatrixRowIdx;
   }
 
-  unsigned getMatrixNumRows() const {
-    assert(isMatrixRow());
-    return NumRows;
-  }
-
-  unsigned getMatrixNumCols() const {
-    assert(isMatrixRow());
-    return NumCols;
+  llvm::Constant *getMatrixRowElts() const {
+    assert(isMatrixRowSwizzle() && "not a matrix row swizzle lvalue");
+    return MatrixRowElts;
   }
 
   // extended vector elements.
@@ -510,18 +508,30 @@ class LValue {
   }
 
   static LValue MakeMatrixRow(Address Addr, llvm::Value *RowIdx,
-                              unsigned NumCols, unsigned NumRows,
                               QualType MatrixTy, LValueBaseInfo BaseInfo,
                               TBAAAccessInfo TBAAInfo) {
     LValue LV;
     LV.LVType = MatrixRow;
     LV.MatrixRowIdx = RowIdx; // store the row index here
-    LV.NumCols = NumCols;
-    LV.NumRows = NumRows;
+    LV.MatrixRowElts = nullptr; // use sequential indexing
     LV.Initialize(MatrixTy, MatrixTy.getQualifiers(), Addr, BaseInfo, 
TBAAInfo);
     return LV;
   }
 
+  static LValue MakeMatrixRowSwizzle(Address MatAddr, llvm::Value *RowIdx,
+                                     llvm::Constant *Cols, QualType MatrixTy,
+                                     LValueBaseInfo BaseInfo,
+                                     TBAAAccessInfo TBAAInfo) {
+    LValue LV;
+    LV.LVType = MatrixRow;
+    LV.Addr = MatAddr;
+    LV.MatrixRowIdx = RowIdx;
+    LV.MatrixRowElts = Cols; // use indices in list order
+    LV.Initialize(MatrixTy, MatrixTy.getQualifiers(), MatAddr, BaseInfo,
+                  TBAAInfo);
+    return LV;
+  }
+
   static LValue MakeMatrixElt(Address matAddress, llvm::Value *Idx,
                               QualType type, LValueBaseInfo BaseInfo,
                               TBAAAccessInfo TBAAInfo) {

diff  --git 
a/clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptDynamicSwizzle.hlsl 
b/clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptDynamicSwizzle.hlsl
index 82b7007fcdb3e..bfd6e68af8775 100644
--- 
a/clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptDynamicSwizzle.hlsl
+++ 
b/clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptDynamicSwizzle.hlsl
@@ -1,11 +1,153 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 6
 // RUN: %clang_cc1 -triple dxil-pc-shadermodel6.7-library -disable-llvm-passes 
-emit-llvm -finclude-default-header -o - %s | FileCheck %s
-// BUG: https://github.com/llvm/llvm-project/issues/170777
-// XFAIL: *
 
+// CHECK-LABEL: define hidden void 
@_Z9setMatrixRu11matrix_typeILm4ELm4EfEiDv4_f(
+// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) 
[[M:%.*]], i32 noundef [[INDEX:%.*]], <4 x float> noundef nofpclass(nan inf) 
[[V:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[M_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[INDEX_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[V_ADDR:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT:    store ptr [[M]], ptr [[M_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[INDEX]], ptr [[INDEX_ADDR]], align 4
+// CHECK-NEXT:    store <4 x float> [[V]], ptr [[V_ADDR]], align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[V_ADDR]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[M_ADDR]], align 4, !nonnull 
[[META3:![0-9]+]], !align [[META4:![0-9]+]]
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[INDEX_ADDR]], align 4
+// CHECK-NEXT:    [[MATRIX_LOAD:%.*]] = load <16 x float>, ptr [[TMP1]], align 
4
+// CHECK-NEXT:    [[TMP3:%.*]] = add i32 12, [[TMP2]]
+// CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+// CHECK-NEXT:    [[TMP5:%.*]] = insertelement <16 x float> [[MATRIX_LOAD]], 
float [[TMP4]], i32 [[TMP3]]
+// CHECK-NEXT:    [[TMP6:%.*]] = add i32 8, [[TMP2]]
+// CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+// CHECK-NEXT:    [[TMP8:%.*]] = insertelement <16 x float> [[TMP5]], float 
[[TMP7]], i32 [[TMP6]]
+// CHECK-NEXT:    [[TMP9:%.*]] = add i32 4, [[TMP2]]
+// CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+// CHECK-NEXT:    [[TMP11:%.*]] = insertelement <16 x float> [[TMP8]], float 
[[TMP10]], i32 [[TMP9]]
+// CHECK-NEXT:    [[TMP12:%.*]] = add i32 0, [[TMP2]]
+// CHECK-NEXT:    [[TMP13:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+// CHECK-NEXT:    [[TMP14:%.*]] = insertelement <16 x float> [[TMP11]], float 
[[TMP13]], i32 [[TMP12]]
+// CHECK-NEXT:    store <16 x float> [[TMP14]], ptr [[TMP1]], align 4
+// CHECK-NEXT:    ret void
+//
 void setMatrix(out float4x4 M, int index, float4 V) {
     M[index].abgr = V;
 }
 
+// CHECK-LABEL: define hidden noundef nofpclass(nan inf) <3 x float> 
@_Z9getMatrixu11matrix_typeILm4ELm4EfEi(
+// CHECK-SAME: <16 x float> noundef nofpclass(nan inf) [[M:%.*]], i32 noundef 
[[INDEX:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[M_ADDR:%.*]] = alloca [16 x float], align 4
+// CHECK-NEXT:    [[INDEX_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store <16 x float> [[M]], ptr [[M_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[INDEX]], ptr [[INDEX_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[INDEX_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x float>, ptr [[M_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = add i32 0, [[TMP0]]
+// CHECK-NEXT:    [[TMP3:%.*]] = extractelement <16 x float> [[TMP1]], i32 
[[TMP2]]
+// CHECK-NEXT:    [[TMP4:%.*]] = insertelement <3 x float> poison, float 
[[TMP3]], i32 0
+// CHECK-NEXT:    [[TMP5:%.*]] = add i32 4, [[TMP0]]
+// CHECK-NEXT:    [[TMP6:%.*]] = extractelement <16 x float> [[TMP1]], i32 
[[TMP5]]
+// CHECK-NEXT:    [[TMP7:%.*]] = insertelement <3 x float> [[TMP4]], float 
[[TMP6]], i32 1
+// CHECK-NEXT:    [[TMP8:%.*]] = add i32 8, [[TMP0]]
+// CHECK-NEXT:    [[TMP9:%.*]] = extractelement <16 x float> [[TMP1]], i32 
[[TMP8]]
+// CHECK-NEXT:    [[TMP10:%.*]] = insertelement <3 x float> [[TMP7]], float 
[[TMP9]], i32 2
+// CHECK-NEXT:    ret <3 x float> [[TMP10]]
+//
 float3 getMatrix(float4x4 M, int index) {
     return M[index].rgb;
 }
+
+// CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x float> 
@_Z9getMatrixu11matrix_typeILm3ELm3EfEi(
+// CHECK-SAME: <9 x float> noundef nofpclass(nan inf) [[M:%.*]], i32 noundef 
[[INDEX:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[M_ADDR:%.*]] = alloca [9 x float], align 4
+// CHECK-NEXT:    [[INDEX_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store <9 x float> [[M]], ptr [[M_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[INDEX]], ptr [[INDEX_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[INDEX_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load <9 x float>, ptr [[M_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = add i32 0, [[TMP0]]
+// CHECK-NEXT:    [[TMP3:%.*]] = extractelement <9 x float> [[TMP1]], i32 
[[TMP2]]
+// CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> poison, float 
[[TMP3]], i32 0
+// CHECK-NEXT:    [[TMP5:%.*]] = add i32 3, [[TMP0]]
+// CHECK-NEXT:    [[TMP6:%.*]] = extractelement <9 x float> [[TMP1]], i32 
[[TMP5]]
+// CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> [[TMP4]], float 
[[TMP6]], i32 1
+// CHECK-NEXT:    [[TMP8:%.*]] = add i32 6, [[TMP0]]
+// CHECK-NEXT:    [[TMP9:%.*]] = extractelement <9 x float> [[TMP1]], i32 
[[TMP8]]
+// CHECK-NEXT:    [[TMP10:%.*]] = insertelement <4 x float> [[TMP7]], float 
[[TMP9]], i32 2
+// CHECK-NEXT:    [[TMP11:%.*]] = add i32 0, [[TMP0]]
+// CHECK-NEXT:    [[TMP12:%.*]] = extractelement <9 x float> [[TMP1]], i32 
[[TMP11]]
+// CHECK-NEXT:    [[TMP13:%.*]] = insertelement <4 x float> [[TMP10]], float 
[[TMP12]], i32 3
+// CHECK-NEXT:    ret <4 x float> [[TMP13]]
+//
+float4 getMatrix(float3x3 M, int index) {
+    return M[index].rgbr;
+}
+
+// CHECK-LABEL: define hidden noundef <3 x i32> 
@_Z19getMatrixSwizzle2x3Ru11matrix_typeILm2ELm3EiEi(
+// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(24) 
[[M:%.*]], i32 noundef [[INDEX:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[M_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[INDEX_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store ptr [[M]], ptr [[M_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[INDEX]], ptr [[INDEX_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[M_ADDR]], align 4, !nonnull 
[[META3]], !align [[META4]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[INDEX_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load <6 x i32>, ptr [[TMP0]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = add i32 4, [[TMP1]]
+// CHECK-NEXT:    [[TMP4:%.*]] = extractelement <6 x i32> [[TMP2]], i32 
[[TMP3]]
+// CHECK-NEXT:    [[TMP5:%.*]] = insertelement <3 x i32> poison, i32 [[TMP4]], 
i32 0
+// CHECK-NEXT:    [[TMP6:%.*]] = add i32 0, [[TMP1]]
+// CHECK-NEXT:    [[TMP7:%.*]] = extractelement <6 x i32> [[TMP2]], i32 
[[TMP6]]
+// CHECK-NEXT:    [[TMP8:%.*]] = insertelement <3 x i32> [[TMP5]], i32 
[[TMP7]], i32 1
+// CHECK-NEXT:    [[TMP9:%.*]] = add i32 2, [[TMP1]]
+// CHECK-NEXT:    [[TMP10:%.*]] = extractelement <6 x i32> [[TMP2]], i32 
[[TMP9]]
+// CHECK-NEXT:    [[TMP11:%.*]] = insertelement <3 x i32> [[TMP8]], i32 
[[TMP10]], i32 2
+// CHECK-NEXT:    ret <3 x i32> [[TMP11]]
+//
+int3 getMatrixSwizzle2x3(out int2x3 M, int index) {
+    return M[index].brg;
+}
+
+// CHECK-LABEL: define hidden void 
@_Z26setMatrixSwizzleFromMatrixRu11matrix_typeILm2ELm3EiES_i(
+// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(24) 
[[M:%.*]], <6 x i32> noundef [[N:%.*]], i32 noundef [[INDEX:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[M_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[N_ADDR:%.*]] = alloca [6 x i32], align 4
+// CHECK-NEXT:    [[INDEX_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store ptr [[M]], ptr [[M_ADDR]], align 4
+// CHECK-NEXT:    store <6 x i32> [[N]], ptr [[N_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[INDEX]], ptr [[INDEX_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[INDEX_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load <6 x i32>, ptr [[N_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = add i32 0, [[TMP0]]
+// CHECK-NEXT:    [[MATRIX_ELEM:%.*]] = extractelement <6 x i32> [[TMP1]], i32 
[[TMP2]]
+// CHECK-NEXT:    [[MATRIX_ROW_INS:%.*]] = insertelement <3 x i32> poison, i32 
[[MATRIX_ELEM]], i32 0
+// CHECK-NEXT:    [[TMP3:%.*]] = add i32 2, [[TMP0]]
+// CHECK-NEXT:    [[MATRIX_ELEM1:%.*]] = extractelement <6 x i32> [[TMP1]], 
i32 [[TMP3]]
+// CHECK-NEXT:    [[MATRIX_ROW_INS2:%.*]] = insertelement <3 x i32> 
[[MATRIX_ROW_INS]], i32 [[MATRIX_ELEM1]], i32 1
+// CHECK-NEXT:    [[TMP4:%.*]] = add i32 4, [[TMP0]]
+// CHECK-NEXT:    [[MATRIX_ELEM3:%.*]] = extractelement <6 x i32> [[TMP1]], 
i32 [[TMP4]]
+// CHECK-NEXT:    [[MATRIX_ROW_INS4:%.*]] = insertelement <3 x i32> 
[[MATRIX_ROW_INS2]], i32 [[MATRIX_ELEM3]], i32 2
+// CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[M_ADDR]], align 4, !nonnull 
[[META3]], !align [[META4]]
+// CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[INDEX_ADDR]], align 4
+// CHECK-NEXT:    [[MATRIX_LOAD:%.*]] = load <6 x i32>, ptr [[TMP5]], align 4
+// CHECK-NEXT:    [[TMP7:%.*]] = add i32 4, [[TMP6]]
+// CHECK-NEXT:    [[TMP8:%.*]] = extractelement <3 x i32> [[MATRIX_ROW_INS4]], 
i32 0
+// CHECK-NEXT:    [[TMP9:%.*]] = insertelement <6 x i32> [[MATRIX_LOAD]], i32 
[[TMP8]], i32 [[TMP7]]
+// CHECK-NEXT:    [[TMP10:%.*]] = add i32 0, [[TMP6]]
+// CHECK-NEXT:    [[TMP11:%.*]] = extractelement <3 x i32> 
[[MATRIX_ROW_INS4]], i32 1
+// CHECK-NEXT:    [[TMP12:%.*]] = insertelement <6 x i32> [[TMP9]], i32 
[[TMP11]], i32 [[TMP10]]
+// CHECK-NEXT:    [[TMP13:%.*]] = add i32 2, [[TMP6]]
+// CHECK-NEXT:    [[TMP14:%.*]] = extractelement <3 x i32> 
[[MATRIX_ROW_INS4]], i32 2
+// CHECK-NEXT:    [[TMP15:%.*]] = insertelement <6 x i32> [[TMP12]], i32 
[[TMP14]], i32 [[TMP13]]
+// CHECK-NEXT:    store <6 x i32> [[TMP15]], ptr [[TMP5]], align 4
+// CHECK-NEXT:    ret void
+//
+void setMatrixSwizzleFromMatrix(out int2x3 M, int2x3 N, int index) {
+    M[index].brg = N[index];
+}
+//.
+// CHECK: [[META3]] = !{}
+// CHECK: [[META4]] = !{i64 4}
+//.

diff  --git a/clang/test/SemaHLSL/matrix_single_subscript_errors.hlsl 
b/clang/test/SemaHLSL/matrix_single_subscript_errors.hlsl
index cb611b6555644..256a71eb72167 100644
--- a/clang/test/SemaHLSL/matrix_single_subscript_errors.hlsl
+++ b/clang/test/SemaHLSL/matrix_single_subscript_errors.hlsl
@@ -10,3 +10,7 @@ void bad_index_type(float f) {
 void bad_constant_row_index() {
   gM[2]; // expected-error {{matrix row index is outside the allowed range}}
 }
+
+float4 getMatrix(float3x3 M, int index) {
+    return M[index].rgba; // expected-error {{vector component access exceeds 
type 'vector<float, 3>' (vector of 3 'float' values)}}
+}


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] 88788dc - [HLSL][Matrix] Implement dynamic single subscript swizzle (#173201)

Reply via email to