================ @@ -1,11 +1,129 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // RUN: %clang_cc1 -triple dxil-pc-shadermodel6.7-library -disable-llvm-passes -emit-llvm -finclude-default-header -o - %s | FileCheck %s -// BUG: https://github.com/llvm/llvm-project/issues/170777 -// XFAIL: * +// CHECK-LABEL: define hidden void @_Z9setMatrixRu11matrix_typeILm4ELm4EfEiDv4_f( +// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) [[M:%.*]], i32 noundef [[INDEX:%.*]], <4 x float> noundef nofpclass(nan inf) [[V:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: [[INDEX_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[V_ADDR:%.*]] = alloca <4 x float>, align 16 +// CHECK-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 4 +// CHECK-NEXT: store i32 [[INDEX]], ptr [[INDEX_ADDR]], align 4 +// CHECK-NEXT: store <4 x float> [[V]], ptr [[V_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[V_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[M_ADDR]], align 4, !nonnull [[META3:![0-9]+]], !align [[META4:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[INDEX_ADDR]], align 4 +// CHECK-NEXT: [[MATRIX_LOAD:%.*]] = load <16 x float>, ptr [[TMP1]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = add i32 12, [[TMP2]] +// CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 +// CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x float> [[MATRIX_LOAD]], float [[TMP4]], i32 [[TMP3]] +// CHECK-NEXT: [[TMP6:%.*]] = add i32 8, [[TMP2]] +// CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 +// CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x float> [[TMP5]], float [[TMP7]], i32 [[TMP6]] +// CHECK-NEXT: [[TMP9:%.*]] = add i32 4, [[TMP2]] +// CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +// CHECK-NEXT: [[TMP11:%.*]] = insertelement <16 x float> [[TMP8]], float [[TMP10]], i32 [[TMP9]] +// CHECK-NEXT: [[TMP12:%.*]] = add i32 0, [[TMP2]] +// CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +// CHECK-NEXT: [[TMP14:%.*]] = insertelement <16 x float> [[TMP11]], float [[TMP13]], i32 [[TMP12]] +// CHECK-NEXT: store <16 x float> [[TMP14]], ptr [[TMP1]], align 4 ---------------- farzonl wrote:
This is the same behavior DXC is doing. Notice DXC also emits 16 insertelement instructions: https://hlsl.godbolt.org/z/q8bE1Pzdh https://github.com/llvm/llvm-project/pull/173201 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
