llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang-codegen

Author: Deric C. (Icohedron)

<details>
<summary>Changes</summary>

Fixes #<!-- -->183127

This PR makes the matrix truncation cast implementation use the new matrix 
flattened index helper functions introduced by #<!-- -->182904 so that it reads 
elements from the source matrix using the default matrix memory layout instead 
of always assuming column-major order.

This PR also fixes a bug where matrix truncation always resulted in a truncated 
matrix whose memory layout is row-major order regardless of the default matrix 
memory layout.

Assisted-by: claude-opus-4.6

---
Full diff: https://github.com/llvm/llvm-project/pull/184280.diff


4 Files Affected:

- (modified) clang/include/clang/AST/TypeBase.h (+25-1) 
- (modified) clang/lib/CodeGen/CGExprScalar.cpp (+9-8) 
- (modified) clang/test/CodeGenHLSL/BasicFeatures/MatrixExplicitTruncation.hlsl 
(+24-12) 
- (modified) clang/test/CodeGenHLSL/BasicFeatures/MatrixImplicitTruncation.hlsl 
(+22-11) 


``````````diff
diff --git a/clang/include/clang/AST/TypeBase.h 
b/clang/include/clang/AST/TypeBase.h
index dc4442bfeb795..d913c93fdce92 100644
--- a/clang/include/clang/AST/TypeBase.h
+++ b/clang/include/clang/AST/TypeBase.h
@@ -4435,7 +4435,7 @@ class ConstantMatrixType final : public MatrixType {
   /// row-major order flattened index. Otherwise, returns the column-major 
order
   /// flattened index.
   unsigned getFlattenedIndex(unsigned Row, unsigned Column,
-                             bool IsRowMajor = false) {
+                             bool IsRowMajor = false) const {
     return IsRowMajor ? getRowMajorFlattenedIndex(Row, Column)
                       : getColumnMajorFlattenedIndex(Row, Column);
   }
@@ -4457,6 +4457,30 @@ class ConstantMatrixType final : public MatrixType {
     return Column * NumRows + Row;
   }
 
+  /// Given a row-major flattened index \p Index, return the corresponding
+  /// {row, column} position.
+  std::pair<unsigned, unsigned>
+  getRowMajorRowAndColumn(unsigned Index) const {
+    return {Index / NumColumns, Index % NumColumns};
+  }
+
+  /// Given a column-major flattened index \p Index, return the corresponding
+  /// {row, column} position.
+  std::pair<unsigned, unsigned>
+  getColumnMajorRowAndColumn(unsigned Index) const {
+    return {Index % NumRows, Index / NumRows};
+  }
+
+  /// Given a flattened index \p Index, return the corresponding {row, column}
+  /// position. If \p IsRowMajor is true, interprets \p Index as a row-major
+  /// flattened index. Otherwise, interprets it as a column-major flattened
+  /// index.
+  std::pair<unsigned, unsigned> getRowAndColumn(unsigned Index,
+                                                bool IsRowMajor = false) const 
{
+    return IsRowMajor ? getRowMajorRowAndColumn(Index)
+                      : getColumnMajorRowAndColumn(Index);
+  }
+
   void Profile(llvm::FoldingSetNodeID &ID) {
     Profile(ID, getElementType(), getNumRows(), getNumColumns(),
             getTypeClass());
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp 
b/clang/lib/CodeGen/CGExprScalar.cpp
index 06eadb6c07507..35e3796a42876 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -3118,14 +3118,15 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
       SmallVector<int> Mask;
       unsigned NumCols = MatTy->getNumColumns();
       unsigned NumRows = MatTy->getNumRows();
-      unsigned ColOffset = NumCols;
-      if (auto *SrcMatTy = E->getType()->getAs<ConstantMatrixType>())
-        ColOffset = SrcMatTy->getNumColumns();
-      for (unsigned R = 0; R < NumRows; R++) {
-        for (unsigned C = 0; C < NumCols; C++) {
-          unsigned I = R * ColOffset + C;
-          Mask.push_back(I);
-        }
+      auto *SrcMatTy = E->getType()->getAs<ConstantMatrixType>();
+      assert(SrcMatTy && "Source type must be a matrix type.");
+      assert(NumRows <= SrcMatTy->getNumRows());
+      assert(NumCols <= SrcMatTy->getNumColumns());
+      bool IsRowMajor = CGF.getLangOpts().getDefaultMatrixMemoryLayout() ==
+                        LangOptions::MatrixMemoryLayout::MatrixRowMajor;
+      for (unsigned I = 0, E = MatTy->getNumElementsFlattened(); I < E; I++) {
+        auto [Row, Col] = MatTy->getRowAndColumn(I, IsRowMajor);
+        Mask.push_back(SrcMatTy->getFlattenedIndex(Row, Col, IsRowMajor));
       }
 
       return Builder.CreateShuffleVector(Mat, Mask, "trunc");
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/MatrixExplicitTruncation.hlsl 
b/clang/test/CodeGenHLSL/BasicFeatures/MatrixExplicitTruncation.hlsl
index fb32478f2cac9..587ccfe88a627 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/MatrixExplicitTruncation.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/MatrixExplicitTruncation.hlsl
@@ -1,14 +1,17 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 6
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.7-library -disable-llvm-passes 
-emit-llvm -finclude-default-header -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.7-library -disable-llvm-passes 
-emit-llvm -finclude-default-header -fmatrix-memory-layout=row-major -o - %s | 
FileCheck %s --check-prefixes=CHECK,ROW-CHECK
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.7-library -disable-llvm-passes 
-emit-llvm -finclude-default-header -fmatrix-memory-layout=column-major -o - %s 
| FileCheck %s --check-prefixes=CHECK,COL-CHECK
 
 // CHECK-LABEL: define hidden noundef <12 x i32> 
@_Z10trunc_castu11matrix_typeILm4ELm4EiE(
 // CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[I44_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
-// CHECK-NEXT:    [[I34:%.*]] = alloca [4 x <3 x i32>], align 4
+// ROW-CHECK-NEXT:    [[I34:%.*]] = alloca [3 x <4 x i32>], align 4
+// COL-CHECK-NEXT:    [[I34:%.*]] = alloca [4 x <3 x i32>], align 4
 // CHECK-NEXT:    store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
-// CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 
7, i32 8, i32 9, i32 10, i32 11>
+// ROW-CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 
7, i32 8, i32 9, i32 10, i32 11>
+// COL-CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6, i32 8, i32 
9, i32 10, i32 12, i32 13, i32 14>
 // CHECK-NEXT:    store <12 x i32> [[TRUNC]], ptr [[I34]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load <12 x i32>, ptr [[I34]], align 4
 // CHECK-NEXT:    ret <12 x i32> [[TMP1]]
@@ -22,10 +25,12 @@
 // CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[I44_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
-// CHECK-NEXT:    [[I43:%.*]] = alloca [3 x <4 x i32>], align 4
+// ROW-CHECK-NEXT:    [[I43:%.*]] = alloca [4 x <3 x i32>], align 4
+// COL-CHECK-NEXT:    [[I43:%.*]] = alloca [3 x <4 x i32>], align 4
 // CHECK-NEXT:    store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
-// CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6, i32 8, i32 
9, i32 10, i32 12, i32 13, i32 14>
+// ROW-CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6, i32 8, i32 
9, i32 10, i32 12, i32 13, i32 14>
+// COL-CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 
7, i32 8, i32 9, i32 10, i32 11>
 // CHECK-NEXT:    store <12 x i32> [[TRUNC]], ptr [[I43]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load <12 x i32>, ptr [[I43]], align 4
 // CHECK-NEXT:    ret <12 x i32> [[TMP1]]
@@ -56,10 +61,12 @@
 // CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[I44_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
-// CHECK-NEXT:    [[I32:%.*]] = alloca [2 x <3 x i32>], align 4
+// ROW-CHECK-NEXT:    [[I32:%.*]] = alloca [3 x <2 x i32>], align 4
+// COL-CHECK-NEXT:    [[I32:%.*]] = alloca [2 x <3 x i32>], align 4
 // CHECK-NEXT:    store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
-// CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <6 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9>
+// ROW-CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <6 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9>
+// COL-CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6>
 // CHECK-NEXT:    store <6 x i32> [[TRUNC]], ptr [[I32]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load <6 x i32>, ptr [[I32]], align 4
 // CHECK-NEXT:    ret <6 x i32> [[TMP1]]
@@ -73,10 +80,12 @@
 // CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[I44_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
-// CHECK-NEXT:    [[I23:%.*]] = alloca [3 x <2 x i32>], align 4
+// ROW-CHECK-NEXT:    [[I23:%.*]] = alloca [2 x <3 x i32>], align 4
+// COL-CHECK-NEXT:    [[I23:%.*]] = alloca [3 x <2 x i32>], align 4
 // CHECK-NEXT:    store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
-// CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6>
+// ROW-CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6>
+// COL-CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <6 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9>
 // CHECK-NEXT:    store <6 x i32> [[TRUNC]], ptr [[I23]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load <6 x i32>, ptr [[I23]], align 4
 // CHECK-NEXT:    ret <6 x i32> [[TMP1]]
@@ -107,10 +116,12 @@
 // CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[I44_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
-// CHECK-NEXT:    [[I21:%.*]] = alloca [1 x <2 x i32>], align 4
+// ROW-CHECK-NEXT:    [[I21:%.*]] = alloca [2 x <1 x i32>], align 4
+// COL-CHECK-NEXT:    [[I21:%.*]] = alloca [1 x <2 x i32>], align 4
 // CHECK-NEXT:    store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
-// CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <2 x i32> <i32 0, i32 4>
+// ROW-CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <2 x i32> <i32 0, i32 4>
+// COL-CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <2 x i32> <i32 0, i32 1>
 // CHECK-NEXT:    store <2 x i32> [[TRUNC]], ptr [[I21]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[I21]], align 4
 // CHECK-NEXT:    ret <2 x i32> [[TMP1]]
@@ -144,7 +155,8 @@
 // CHECK-NEXT:    [[I1:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
-// CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 
7, i32 8, i32 9, i32 10, i32 11>
+// ROW-CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 
7, i32 8, i32 9, i32 10, i32 11>
+// COL-CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6, i32 8, i32 
9, i32 10, i32 12, i32 13, i32 14>
 // CHECK-NEXT:    [[CAST_MTRUNC:%.*]] = extractelement <12 x i32> [[TRUNC]], 
i32 0
 // CHECK-NEXT:    store i32 [[CAST_MTRUNC]], ptr [[I1]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I1]], align 4
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/MatrixImplicitTruncation.hlsl 
b/clang/test/CodeGenHLSL/BasicFeatures/MatrixImplicitTruncation.hlsl
index d8738c8948f0f..1a9a953ef8367 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/MatrixImplicitTruncation.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/MatrixImplicitTruncation.hlsl
@@ -1,14 +1,17 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 6
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.7-library -disable-llvm-passes 
-emit-llvm -finclude-default-header -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.7-library -disable-llvm-passes 
-emit-llvm -finclude-default-header -fmatrix-memory-layout=row-major -o - %s | 
FileCheck %s --check-prefixes=CHECK,ROW-CHECK
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.7-library -disable-llvm-passes 
-emit-llvm -finclude-default-header -fmatrix-memory-layout=column-major -o - %s 
| FileCheck %s --check-prefixes=CHECK,COL-CHECK
 
 // CHECK-LABEL: define hidden noundef <12 x i32> 
@_Z10trunc_castu11matrix_typeILm4ELm4EiE(
 // CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[I44_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
-// CHECK-NEXT:    [[I34:%.*]] = alloca [4 x <3 x i32>], align 4
+// ROW-CHECK-NEXT:    [[I34:%.*]] = alloca [3 x <4 x i32>], align 4
+// COL-CHECK-NEXT:    [[I34:%.*]] = alloca [4 x <3 x i32>], align 4
 // CHECK-NEXT:    store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
-// CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 
7, i32 8, i32 9, i32 10, i32 11>
+// ROW-CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 
7, i32 8, i32 9, i32 10, i32 11>
+// COL-CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6, i32 8, i32 
9, i32 10, i32 12, i32 13, i32 14>
 // CHECK-NEXT:    store <12 x i32> [[TRUNC]], ptr [[I34]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load <12 x i32>, ptr [[I34]], align 4
 // CHECK-NEXT:    ret <12 x i32> [[TMP1]]
@@ -22,10 +25,12 @@
 // CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[I44_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
-// CHECK-NEXT:    [[I43:%.*]] = alloca [3 x <4 x i32>], align 4
+// ROW-CHECK-NEXT:    [[I43:%.*]] = alloca [4 x <3 x i32>], align 4
+// COL-CHECK-NEXT:    [[I43:%.*]] = alloca [3 x <4 x i32>], align 4
 // CHECK-NEXT:    store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
-// CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6, i32 8, i32 
9, i32 10, i32 12, i32 13, i32 14>
+// ROW-CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6, i32 8, i32 
9, i32 10, i32 12, i32 13, i32 14>
+// COL-CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 
7, i32 8, i32 9, i32 10, i32 11>
 // CHECK-NEXT:    store <12 x i32> [[TRUNC]], ptr [[I43]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load <12 x i32>, ptr [[I43]], align 4
 // CHECK-NEXT:    ret <12 x i32> [[TMP1]]
@@ -56,10 +61,12 @@
 // CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[I44_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
-// CHECK-NEXT:    [[I32:%.*]] = alloca [2 x <3 x i32>], align 4
+// ROW-CHECK-NEXT:    [[I32:%.*]] = alloca [3 x <2 x i32>], align 4
+// COL-CHECK-NEXT:    [[I32:%.*]] = alloca [2 x <3 x i32>], align 4
 // CHECK-NEXT:    store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
-// CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <6 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9>
+// ROW-CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <6 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9>
+// COL-CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6>
 // CHECK-NEXT:    store <6 x i32> [[TRUNC]], ptr [[I32]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load <6 x i32>, ptr [[I32]], align 4
 // CHECK-NEXT:    ret <6 x i32> [[TMP1]]
@@ -73,10 +80,12 @@
 // CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[I44_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
-// CHECK-NEXT:    [[I23:%.*]] = alloca [3 x <2 x i32>], align 4
+// ROW-CHECK-NEXT:    [[I23:%.*]] = alloca [2 x <3 x i32>], align 4
+// COL-CHECK-NEXT:    [[I23:%.*]] = alloca [3 x <2 x i32>], align 4
 // CHECK-NEXT:    store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
-// CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6>
+// ROW-CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 4, i32 5, i32 6>
+// COL-CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <6 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9>
 // CHECK-NEXT:    store <6 x i32> [[TRUNC]], ptr [[I23]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load <6 x i32>, ptr [[I23]], align 4
 // CHECK-NEXT:    ret <6 x i32> [[TMP1]]
@@ -107,10 +116,12 @@
 // CHECK-SAME: <16 x i32> noundef [[I44:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[I44_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
-// CHECK-NEXT:    [[I21:%.*]] = alloca [1 x <2 x i32>], align 4
+// ROW-CHECK-NEXT:    [[I21:%.*]] = alloca [2 x <1 x i32>], align 4
+// COL-CHECK-NEXT:    [[I21:%.*]] = alloca [1 x <2 x i32>], align 4
 // CHECK-NEXT:    store <16 x i32> [[I44]], ptr [[I44_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr [[I44_ADDR]], align 4
-// CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <2 x i32> <i32 0, i32 4>
+// ROW-CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <2 x i32> <i32 0, i32 4>
+// COL-CHECK-NEXT:    [[TRUNC:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x 
i32> poison, <2 x i32> <i32 0, i32 1>
 // CHECK-NEXT:    store <2 x i32> [[TRUNC]], ptr [[I21]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[I21]], align 4
 // CHECK-NEXT:    ret <2 x i32> [[TMP1]]

``````````

</details>


https://github.com/llvm/llvm-project/pull/184280
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to