https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/192977
>From b0c58f23b2efe774a79e4b2e76766b357d9d63f7 Mon Sep 17 00:00:00 2001 From: Momchil Velikov <[email protected]> Date: Mon, 20 Apr 2026 13:22:10 +0000 Subject: [PATCH 1/2] [Clang] Fix incorrect type for `__mfp8` in `extractelement` codegen The codegen for extracting an element from an FP8 vector was emitting a simple `extractelement` with `i8` type for the extracted element. The `__mfp8` type is represented as `<1 x i8>` in LLVM IR. This codegen created inconsistency in Clang - some `__mfp8` expressions would correspond to LLVM IR values with `<1 x i8>` type and some to `i8` type. It also caused an assertion failure when the extracted element was passed as a function argument. This patch fixes the issue by bitcasting the extracted element to `<1 x i8>`. --- clang/lib/CodeGen/CGExprScalar.cpp | 9 ++++++- clang/test/CodeGen/AArch64/fp8-extract.c | 33 ++++++++++++++++++++++++ clang/test/CodeGen/arm-mfp8.c | 12 +++------ 3 files changed, 45 insertions(+), 9 deletions(-) create mode 100644 clang/test/CodeGen/AArch64/fp8-extract.c diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index a8dcf22992983..f8997f0503491 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -2213,7 +2213,14 @@ Value *ScalarExprEmitter::VisitArraySubscriptExpr(ArraySubscriptExpr *E) { if (CGF.SanOpts.has(SanitizerKind::ArrayBounds)) CGF.EmitBoundsCheck(E, E->getBase(), Idx, IdxTy, /*Accessed*/true); - return Builder.CreateExtractElement(Base, Idx, "vecext"); + Value *Ret = Builder.CreateExtractElement(Base, Idx, "vecext"); + + // Even being a scalar the `__mfp8` type corresponds to `<1 x i8>` in LLVM IR. + // Cast the extracted element to the vector type to keep it consistent in + // Clang. + if (E->getType()->isMFloat8Type()) + Ret = Builder.CreateBitCast(Ret, ConvertType(E->getType()), "mfp8ext"); + return Ret; } Value *ScalarExprEmitter::VisitMatrixSingleSubscriptExpr( diff --git a/clang/test/CodeGen/AArch64/fp8-extract.c b/clang/test/CodeGen/AArch64/fp8-extract.c new file mode 100644 index 0000000000000..c0c113600de63 --- /dev/null +++ b/clang/test/CodeGen/AArch64/fp8-extract.c @@ -0,0 +1,33 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg | FileCheck %s +// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +neon -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg | FileCheck %s -check-prefix CHECK-CXX + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -disable-O0-optnone -Werror -Wall -S -o /dev/null %s + +// REQUIRES: aarch64-registered-target + +#include <arm_neon.h> + +// Test for a n internal compiler error when extracting an element from an FP8 +// vector and passing it to a function. + +// CHECK-LABEL: define dso_local void @f( +// CHECK-SAME: <16 x i8> [[V:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[VECEXT:%.*]] = extractelement <16 x i8> [[V]], i32 0 +// CHECK-NEXT: [[MFP8EXT:%.*]] = bitcast i8 [[VECEXT]] to <1 x i8> +// CHECK-NEXT: call void @g(<1 x i8> [[MFP8EXT]]) +// CHECK-NEXT: ret void +// +// CHECK-CXX-LABEL: define dso_local void @_Z1f14__Mfloat8x16_t( +// CHECK-CXX-SAME: <16 x i8> [[V:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-CXX-NEXT: [[ENTRY:.*:]] +// CHECK-CXX-NEXT: [[VECEXT:%.*]] = extractelement <16 x i8> [[V]], i32 0 +// CHECK-CXX-NEXT: [[MFP8EXT:%.*]] = bitcast i8 [[VECEXT]] to <1 x i8> +// CHECK-CXX-NEXT: call void @_Z1gu6__mfp8(<1 x i8> [[MFP8EXT]]) +// CHECK-CXX-NEXT: ret void +// +void f(__Mfloat8x16_t v) { + void g(__mfp8); + g(v[0]); +} diff --git a/clang/test/CodeGen/arm-mfp8.c b/clang/test/CodeGen/arm-mfp8.c index 9385b537f18b3..f99c865e99a95 100644 --- a/clang/test/CodeGen/arm-mfp8.c +++ b/clang/test/CodeGen/arm-mfp8.c @@ -64,20 +64,16 @@ __mfp8 func1n(__mfp8 mfp8) { // CHECK-C-LABEL: define dso_local <1 x i8> @test_extract_element( // CHECK-C-SAME: <16 x i8> [[X:%.*]], i32 noundef [[I:%.*]]) #[[ATTR0]] { // CHECK-C-NEXT: [[ENTRY:.*:]] -// CHECK-C-NEXT: [[RETVAL:%.*]] = alloca <1 x i8>, align 1 // CHECK-C-NEXT: [[VECEXT:%.*]] = extractelement <16 x i8> [[X]], i32 [[I]] -// CHECK-C-NEXT: store i8 [[VECEXT]], ptr [[RETVAL]], align 1 -// CHECK-C-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[RETVAL]], align 1 -// CHECK-C-NEXT: ret <1 x i8> [[TMP0]] +// CHECK-C-NEXT: [[MFP8EXT:%.*]] = bitcast i8 [[VECEXT]] to <1 x i8> +// CHECK-C-NEXT: ret <1 x i8> [[MFP8EXT]] // // CHECK-CXX-LABEL: define dso_local <1 x i8> @_Z20test_extract_element14__Mfloat8x16_ti( // CHECK-CXX-SAME: <16 x i8> [[X:%.*]], i32 noundef [[I:%.*]]) #[[ATTR0]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: [[RETVAL:%.*]] = alloca <1 x i8>, align 1 // CHECK-CXX-NEXT: [[VECEXT:%.*]] = extractelement <16 x i8> [[X]], i32 [[I]] -// CHECK-CXX-NEXT: store i8 [[VECEXT]], ptr [[RETVAL]], align 1 -// CHECK-CXX-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[RETVAL]], align 1 -// CHECK-CXX-NEXT: ret <1 x i8> [[TMP0]] +// CHECK-CXX-NEXT: [[MFP8EXT:%.*]] = bitcast i8 [[VECEXT]] to <1 x i8> +// CHECK-CXX-NEXT: ret <1 x i8> [[MFP8EXT]] // mfloat8_t test_extract_element(mfloat8x16_t x, int i) { return x[i]; >From 5fe924e355eeae522efb8490f4ccd3eca7b2378c Mon Sep 17 00:00:00 2001 From: Momchil Velikov <[email protected]> Date: Fri, 8 May 2026 13:12:20 +0100 Subject: [PATCH 2/2] [fixup] Use `insertelement` instead of `bitcast --- clang/lib/CodeGen/CGExprScalar.cpp | 7 ++-- clang/test/CodeGen/AArch64/fp8-extract.c | 47 +++++++++++++++++------- clang/test/CodeGen/arm-mfp8.c | 8 ++-- 3 files changed, 41 insertions(+), 21 deletions(-) diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index f8997f0503491..38cec47f676dc 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -2216,10 +2216,11 @@ Value *ScalarExprEmitter::VisitArraySubscriptExpr(ArraySubscriptExpr *E) { Value *Ret = Builder.CreateExtractElement(Base, Idx, "vecext"); // Even being a scalar the `__mfp8` type corresponds to `<1 x i8>` in LLVM IR. - // Cast the extracted element to the vector type to keep it consistent in - // Clang. if (E->getType()->isMFloat8Type()) - Ret = Builder.CreateBitCast(Ret, ConvertType(E->getType()), "mfp8ext"); + Ret = Builder.CreateInsertElement( + llvm::PoisonValue::get(llvm::FixedVectorType::get(CGF.Int8Ty, 1)), Ret, + uint64_t(0), "mfp8ext"); + return Ret; } diff --git a/clang/test/CodeGen/AArch64/fp8-extract.c b/clang/test/CodeGen/AArch64/fp8-extract.c index c0c113600de63..7fa93c6f15305 100644 --- a/clang/test/CodeGen/AArch64/fp8-extract.c +++ b/clang/test/CodeGen/AArch64/fp8-extract.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg | FileCheck %s -// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +neon -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg | FileCheck %s -check-prefix CHECK-CXX +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine | FileCheck %s +// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +neon -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine | FileCheck %s -check-prefix CHECK-CXX // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -disable-O0-optnone -Werror -Wall -S -o /dev/null %s @@ -11,23 +11,42 @@ // Test for a n internal compiler error when extracting an element from an FP8 // vector and passing it to a function. -// CHECK-LABEL: define dso_local void @f( -// CHECK-SAME: <16 x i8> [[V:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-LABEL: define dso_local void @test_var( +// CHECK-SAME: <16 x i8> [[V:%.*]], i32 noundef [[I:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VECEXT:%.*]] = extractelement <16 x i8> [[V]], i32 0 -// CHECK-NEXT: [[MFP8EXT:%.*]] = bitcast i8 [[VECEXT]] to <1 x i8> -// CHECK-NEXT: call void @g(<1 x i8> [[MFP8EXT]]) +// CHECK-NEXT: [[VECEXT:%.*]] = extractelement <16 x i8> [[V]], i32 [[I]] +// CHECK-NEXT: [[MFP8CAST:%.*]] = insertelement <1 x i8> poison, i8 [[VECEXT]], i64 0 +// CHECK-NEXT: call void @g(<1 x i8> [[MFP8CAST]]) #[[ATTR2:[0-9]+]] // CHECK-NEXT: ret void // -// CHECK-CXX-LABEL: define dso_local void @_Z1f14__Mfloat8x16_t( -// CHECK-CXX-SAME: <16 x i8> [[V:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-CXX-LABEL: define dso_local void @_Z8test_var14__Mfloat8x16_ti( +// CHECK-CXX-SAME: <16 x i8> [[V:%.*]], i32 noundef [[I:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] -// CHECK-CXX-NEXT: [[VECEXT:%.*]] = extractelement <16 x i8> [[V]], i32 0 -// CHECK-CXX-NEXT: [[MFP8EXT:%.*]] = bitcast i8 [[VECEXT]] to <1 x i8> -// CHECK-CXX-NEXT: call void @_Z1gu6__mfp8(<1 x i8> [[MFP8EXT]]) +// CHECK-CXX-NEXT: [[VECEXT:%.*]] = extractelement <16 x i8> [[V]], i32 [[I]] +// CHECK-CXX-NEXT: [[MFP8CAST:%.*]] = insertelement <1 x i8> poison, i8 [[VECEXT]], i64 0 +// CHECK-CXX-NEXT: call void @_Z1gu6__mfp8(<1 x i8> [[MFP8CAST]]) #[[ATTR2:[0-9]+]] // CHECK-CXX-NEXT: ret void // -void f(__Mfloat8x16_t v) { +void test_var(__Mfloat8x16_t v, int i) { void g(__mfp8); - g(v[0]); + g(v[i]); +} + +// CHECK-LABEL: define dso_local void @test_cst( +// CHECK-SAME: <16 x i8> [[V:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[MFP8CAST:%.*]] = shufflevector <16 x i8> [[V]], <16 x i8> poison, <1 x i32> <i32 3> +// CHECK-NEXT: call void @g(<1 x i8> [[MFP8CAST]]) #[[ATTR2]] +// CHECK-NEXT: ret void +// +// CHECK-CXX-LABEL: define dso_local void @_Z8test_cst14__Mfloat8x16_t( +// CHECK-CXX-SAME: <16 x i8> [[V:%.*]]) #[[ATTR0]] { +// CHECK-CXX-NEXT: [[ENTRY:.*:]] +// CHECK-CXX-NEXT: [[MFP8CAST:%.*]] = shufflevector <16 x i8> [[V]], <16 x i8> poison, <1 x i32> <i32 3> +// CHECK-CXX-NEXT: call void @_Z1gu6__mfp8(<1 x i8> [[MFP8CAST]]) #[[ATTR2]] +// CHECK-CXX-NEXT: ret void +// +void test_cst(__Mfloat8x16_t v) { + void g(__mfp8); + g(v[3]); } diff --git a/clang/test/CodeGen/arm-mfp8.c b/clang/test/CodeGen/arm-mfp8.c index f99c865e99a95..82df375a45686 100644 --- a/clang/test/CodeGen/arm-mfp8.c +++ b/clang/test/CodeGen/arm-mfp8.c @@ -65,15 +65,15 @@ __mfp8 func1n(__mfp8 mfp8) { // CHECK-C-SAME: <16 x i8> [[X:%.*]], i32 noundef [[I:%.*]]) #[[ATTR0]] { // CHECK-C-NEXT: [[ENTRY:.*:]] // CHECK-C-NEXT: [[VECEXT:%.*]] = extractelement <16 x i8> [[X]], i32 [[I]] -// CHECK-C-NEXT: [[MFP8EXT:%.*]] = bitcast i8 [[VECEXT]] to <1 x i8> -// CHECK-C-NEXT: ret <1 x i8> [[MFP8EXT]] +// CHECK-C-NEXT: [[MFP8CAST:%.*]] = insertelement <1 x i8> poison, i8 [[VECEXT]], i64 0 +// CHECK-C-NEXT: ret <1 x i8> [[MFP8CAST]] // // CHECK-CXX-LABEL: define dso_local <1 x i8> @_Z20test_extract_element14__Mfloat8x16_ti( // CHECK-CXX-SAME: <16 x i8> [[X:%.*]], i32 noundef [[I:%.*]]) #[[ATTR0]] { // CHECK-CXX-NEXT: [[ENTRY:.*:]] // CHECK-CXX-NEXT: [[VECEXT:%.*]] = extractelement <16 x i8> [[X]], i32 [[I]] -// CHECK-CXX-NEXT: [[MFP8EXT:%.*]] = bitcast i8 [[VECEXT]] to <1 x i8> -// CHECK-CXX-NEXT: ret <1 x i8> [[MFP8EXT]] +// CHECK-CXX-NEXT: [[MFP8CAST:%.*]] = insertelement <1 x i8> poison, i8 [[VECEXT]], i64 0 +// CHECK-CXX-NEXT: ret <1 x i8> [[MFP8CAST]] // mfloat8_t test_extract_element(mfloat8x16_t x, int i) { return x[i]; _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
