Author: T0b1-iOS Date: 2025-07-17T09:57:32-07:00 New Revision: d35931c49e5b37243ace2b79bec87463772b6c94
URL: https://github.com/llvm/llvm-project/commit/d35931c49e5b37243ace2b79bec87463772b6c94 DIFF: https://github.com/llvm/llvm-project/commit/d35931c49e5b37243ace2b79bec87463772b6c94.diff LOG: [Clang][CodeGen][X86] don't coerce int128 into `{i64,i64}` for SysV-like ABIs (#135230) Currently, clang coerces (u)int128_t to two i64 IR parameters when they are passed in registers. This leads to broken debug info for them after applying SROA+InstCombine. SROA generates IR like this ([godbolt](https://godbolt.org/z/YrTa4chfc)): ```llvm define dso_local { i64, i64 } @add(i64 noundef %a.coerce0, i64 noundef %a.coerce1) { entry: %a.sroa.2.0.insert.ext = zext i64 %a.coerce1 to i128 %a.sroa.2.0.insert.shift = shl nuw i128 %a.sroa.2.0.insert.ext, 64 %a.sroa.0.0.insert.ext = zext i64 %a.coerce0 to i128 %a.sroa.0.0.insert.insert = or i128 %a.sroa.2.0.insert.shift, %a.sroa.0.0.insert.ext #dbg_value(i128 %a.sroa.0.0.insert.insert, !17, !DIExpression(), !18) // ... !17 = !DILocalVariable(name: "a", arg: 1, scope: !10, file: !11, line: 1, type: !14) // ... ``` and InstCombine then removes the `or`, moving it into the `DIExpression`, and the `shl` at which point the debug info salvaging in `Transforms/Local` replaces the arguments with `poison` as it does not allow constants larger than 64 bit in `DIExpression`s. I'm working under the assumption that there is interest in fixing this. If not, please tell me. By not coercing `int128_t`s into `{i64, i64}` but keeping them as `i128`, the debug info stays intact and SelectionDAG then generates two `DW_OP_LLVM_fragment` expressions for the two corresponding argument registers. Given that the ABI code for x64 seems to not coerce the argument when it is passed on the stack, it should not lead to any problems keeping it as an `i128` when it is passed in registers. Alternatively, this could be fixed by checking if a constant value fits in 64 bits in the debug info salvaging code and then extending the value on the expression stack to the necessary width. This fixes InstCombine breaking the debug info but then SelectionDAG removes the expression and that seems significantly more complex to debug. Another fix may be to generate `DW_OP_LLVM_fragment` expressions when removing the `or` as it gets marked as disjoint by InstCombine. However, I don't know if the KnownBits information is still available at the time the `or` gets removed and it would probably require refactoring of the debug info salvaging code as that currently only seems to replace single expressions and is not designed to support generating new debug records. Converting `(u)int128_t` arguments to `i128` in the IR seems like the simpler solution, if it doesn't cause any ABI issues. Added: clang/test/CodeGen/X86/i128-debuginfo.c Modified: clang/lib/CodeGen/Targets/X86.cpp clang/test/CodeGen/X86/x86_64-arguments.c clang/test/CodeGen/alloc-align-attr.c clang/test/CodeGen/builtins.c clang/test/CodeGen/ext-int-cc.c clang/test/CodeGen/extend-arg-64.c Removed: ################################################################################ diff --git a/clang/lib/CodeGen/Targets/X86.cpp b/clang/lib/CodeGen/Targets/X86.cpp index 0b712ac2dabc4..abb91486e7ee6 100644 --- a/clang/lib/CodeGen/Targets/X86.cpp +++ b/clang/lib/CodeGen/Targets/X86.cpp @@ -2470,13 +2470,12 @@ GetSSETypeAtOffset(llvm::Type *IRType, unsigned IROffset, return llvm::Type::getDoubleTy(getVMContext()); } - /// GetINTEGERTypeAtOffset - The ABI specifies that a value should be passed in -/// an 8-byte GPR. This means that we either have a scalar or we are talking -/// about the high or low part of an up-to-16-byte struct. This routine picks -/// the best LLVM IR type to represent this, which may be i64 or may be anything -/// else that the backend will pass in a GPR that works better (e.g. i8, %foo*, -/// etc). +/// one or more 8-byte GPRs. This means that we either have a scalar or we are +/// talking about the high and/or low part of an up-to-16-byte struct. This +/// routine picks the best LLVM IR type to represent this, which may be i64 or +/// may be anything else that the backend will pass in GPRs that works better +/// (e.g. i8, %foo*, etc). /// /// PrefType is an LLVM IR type that corresponds to (part of) the IR type for /// the source type. IROffset is an offset in bytes into the LLVM IR type that @@ -2534,6 +2533,13 @@ GetINTEGERTypeAtOffset(llvm::Type *IRType, unsigned IROffset, SourceOffset); } + // if we have a 128-bit integer, we can pass it safely using an i128 + // so we return that + if (IRType->isIntegerTy(128)) { + assert(IROffset == 0); + return IRType; + } + // Okay, we don't have any better idea of what to pass, so we pass this in an // integer register that isn't too big to fit the rest of the struct. unsigned TySizeInBytes = @@ -2591,8 +2597,7 @@ GetX86_64ByValArgumentPair(llvm::Type *Lo, llvm::Type *Hi, return Result; } -ABIArgInfo X86_64ABIInfo:: -classifyReturnType(QualType RetTy) const { +ABIArgInfo X86_64ABIInfo::classifyReturnType(QualType RetTy) const { // AMD64-ABI 3.2.3p4: Rule 1. Classify the return type with the // classification algorithm. X86_64ABIInfo::Class Lo, Hi; @@ -2638,6 +2643,12 @@ classifyReturnType(QualType RetTy) const { isPromotableIntegerTypeForABI(RetTy)) return ABIArgInfo::getExtend(RetTy); } + + if (ResType->isIntegerTy(128)) { + // i128 are passed directly + assert(Hi == Integer); + return ABIArgInfo::getDirect(ResType); + } break; // AMD64-ABI 3.2.3p4: Rule 4. If the class is SSE, the next @@ -2783,6 +2794,11 @@ X86_64ABIInfo::classifyArgumentType(QualType Ty, unsigned freeIntRegs, return ABIArgInfo::getExtend(Ty, CGT.ConvertType(Ty)); } + if (ResType->isIntegerTy(128)) { + assert(Hi == Integer); + ++neededInt; + return ABIArgInfo::getDirect(ResType); + } break; // AMD64-ABI 3.2.3p3: Rule 3. If the class is SSE, the next diff --git a/clang/test/CodeGen/X86/i128-debuginfo.c b/clang/test/CodeGen/X86/i128-debuginfo.c new file mode 100644 index 0000000000000..4b865c1bed9f0 --- /dev/null +++ b/clang/test/CodeGen/X86/i128-debuginfo.c @@ -0,0 +1,10 @@ +// no autogeneration since update_cc_test_checks does not support -g +// RUN: %clang_cc1 -triple x86_64-pc-linux -O1 -debug-info-kind=limited -emit-llvm -o - %s | FileCheck %s + +// CHECK-LABEL: define{{.*}} i128 @add(i128 noundef %a) +// CHECK: #dbg_value(i128 %a, ![[DI:.*]], !DIExpression() +__int128_t add(__int128_t a) { + return a + a; +} + +// CHECK: ![[DI]] = !DILocalVariable(name: "a", arg: 1 diff --git a/clang/test/CodeGen/X86/x86_64-arguments.c b/clang/test/CodeGen/X86/x86_64-arguments.c index 82845f0a2b31f..580f9487395d3 100644 --- a/clang/test/CodeGen/X86/x86_64-arguments.c +++ b/clang/test/CodeGen/X86/x86_64-arguments.c @@ -551,6 +551,45 @@ struct s68 { void f68(struct s68 x) { } +// CHECK-LABEL: define{{.*}} i128 @f69(i128 noundef %a) +__int128_t f69(__int128_t a) { + return a; +} + +// CHECK-LABEL: define{{.*}} i128 @f70(i128 noundef %a) +__uint128_t f70(__uint128_t a) { + return a; +} + +// check that registers are correctly counted for (u)int128_t arguments +struct s71 { + long long a, b; +}; +// CHECK-LABEL: define{{.*}} void @f71(i128 noundef %a, i128 noundef %b, i64 noundef %c, ptr noundef byval(%struct.s71) align 8 %d) +void f71(__int128_t a, __int128_t b, long long c, struct s71 d) { +} +// CHECK-LABEL: define{{.*}} void @f72(i128 noundef %a, i128 noundef %b, i64 %d.coerce0, i64 %d.coerce1) +void f72(__int128_t a, __int128_t b, struct s71 d) { +} + +// check that structs containing (u)int128_t are passed correctly +struct s73 { + struct inner { + __uint128_t a; + }; + struct inner in; +}; +// CHECK-LABEL: define{{.*}} i128 @f73(i128 %a.coerce) +struct s73 f73(struct s73 a) { + return a; +} + +// check that _BitInt(128) is still passed correctly on the stack +// CHECK-LABEL: define{{.*}} i128 @f74(i128 noundef %b, i128 noundef %c, i128 noundef %d, i64 noundef %e, ptr noundef byval(i128) align 8 %0) +_BitInt(128) f74(__uint128_t b, __uint128_t c, __uint128_t d, long e, _BitInt(128) a) { + return a; +} + /// The synthesized __va_list_tag does not have file/line fields. // CHECK: = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "__va_list_tag", // CHECK-NOT: file: diff --git a/clang/test/CodeGen/alloc-align-attr.c b/clang/test/CodeGen/alloc-align-attr.c index 76e5d1041b19f..c4c4e76eaaa04 100644 --- a/clang/test/CodeGen/alloc-align-attr.c +++ b/clang/test/CodeGen/alloc-align-attr.c @@ -70,66 +70,42 @@ __INT32_TYPE__ test4(__SIZE_TYPE__ a) { struct Empty {}; struct MultiArgs { __INT64_TYPE__ a, b;}; -// Struct parameter doesn't take up an IR parameter, 'i' takes up 2. +// Struct parameter doesn't take up an IR parameter, 'i' takes up 1. // Truncation to i64 is permissible, since alignments of greater than 2^64 are insane. __INT32_TYPE__ *m3(struct Empty s, __int128_t i) __attribute__((alloc_align(2))); // CHECK-LABEL: @test5( // CHECK-NEXT: entry: -// CHECK-NEXT: [[A:%.*]] = alloca i128, align 16 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca i128, align 16 // CHECK-NEXT: [[E:%.*]] = alloca [[STRUCT_EMPTY:%.*]], align 1 -// CHECK-NEXT: [[COERCE:%.*]] = alloca i128, align 16 -// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[A]], i32 0, i32 0 -// CHECK-NEXT: store i64 [[A_COERCE0:%.*]], ptr [[TMP0]], align 16 -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[A]], i32 0, i32 1 -// CHECK-NEXT: store i64 [[A_COERCE1:%.*]], ptr [[TMP1]], align 8 -// CHECK-NEXT: [[A1:%.*]] = load i128, ptr [[A]], align 16 -// CHECK-NEXT: store i128 [[A1]], ptr [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP2:%.*]] = load i128, ptr [[A_ADDR]], align 16 -// CHECK-NEXT: store i128 [[TMP2]], ptr [[COERCE]], align 16 -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[COERCE]], i32 0, i32 0 -// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 16 -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[COERCE]], i32 0, i32 1 -// CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 -// CHECK-NEXT: [[CALL:%.*]] = call ptr @m3(i64 noundef [[TMP4]], i64 noundef [[TMP6]]) -// CHECK-NEXT: [[CASTED_ALIGN:%.*]] = trunc i128 [[TMP2]] to i64 +// CHECK-NEXT: store i128 [[A:%.*]], ptr [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr [[A_ADDR]], align 16 +// CHECK-NEXT: [[CALL:%.*]] = call ptr @m3(i128 noundef [[TMP0]]) +// CHECK-NEXT: [[CASTED_ALIGN:%.*]] = trunc i128 [[TMP0]] to i64 // CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[CALL]], i64 [[CASTED_ALIGN]]) ] -// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[CALL]], align 4 -// CHECK-NEXT: ret i32 [[TMP7]] +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[CALL]], align 4 +// CHECK-NEXT: ret i32 [[TMP1]] // __INT32_TYPE__ test5(__int128_t a) { struct Empty e; return *m3(e, a); } -// Struct parameter takes up 2 parameters, 'i' takes up 2. +// Struct parameter takes up 2 parameters, 'i' takes up 1. __INT32_TYPE__ *m4(struct MultiArgs s, __int128_t i) __attribute__((alloc_align(2))); // CHECK-LABEL: @test6( // CHECK-NEXT: entry: -// CHECK-NEXT: [[A:%.*]] = alloca i128, align 16 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca i128, align 16 // CHECK-NEXT: [[E:%.*]] = alloca [[STRUCT_MULTIARGS:%.*]], align 8 -// CHECK-NEXT: [[COERCE:%.*]] = alloca i128, align 16 -// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[A]], i32 0, i32 0 -// CHECK-NEXT: store i64 [[A_COERCE0:%.*]], ptr [[TMP0]], align 16 -// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[A]], i32 0, i32 1 -// CHECK-NEXT: store i64 [[A_COERCE1:%.*]], ptr [[TMP1]], align 8 -// CHECK-NEXT: [[A1:%.*]] = load i128, ptr [[A]], align 16 -// CHECK-NEXT: store i128 [[A1]], ptr [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP2:%.*]] = load i128, ptr [[A_ADDR]], align 16 -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[E]], i32 0, i32 0 +// CHECK-NEXT: store i128 [[A:%.*]], ptr [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load i128, ptr [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[E]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[E]], i32 0, i32 1 // CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[E]], i32 0, i32 1 -// CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 -// CHECK-NEXT: store i128 [[TMP2]], ptr [[COERCE]], align 16 -// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[COERCE]], i32 0, i32 0 -// CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 16 -// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw { i64, i64 }, ptr [[COERCE]], i32 0, i32 1 -// CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 8 -// CHECK-NEXT: [[CALL:%.*]] = call ptr @m4(i64 [[TMP4]], i64 [[TMP6]], i64 noundef [[TMP8]], i64 noundef [[TMP10]]) -// CHECK-NEXT: [[CASTED_ALIGN:%.*]] = trunc i128 [[TMP2]] to i64 +// CHECK-NEXT: [[CALL:%.*]] = call ptr @m4(i64 [[TMP2]], i64 [[TMP4]], i128 noundef [[TMP0]]) +// CHECK-NEXT: [[CASTED_ALIGN:%.*]] = trunc i128 [[TMP0]] to i64 // CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[CALL]], i64 [[CASTED_ALIGN]]) ] -// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[CALL]], align 4 -// CHECK-NEXT: ret i32 [[TMP11]] +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[CALL]], align 4 +// CHECK-NEXT: ret i32 [[TMP5]] // __INT32_TYPE__ test6(__int128_t a) { struct MultiArgs e; diff --git a/clang/test/CodeGen/builtins.c b/clang/test/CodeGen/builtins.c index eda6c67fdad00..aa9965b815983 100644 --- a/clang/test/CodeGen/builtins.c +++ b/clang/test/CodeGen/builtins.c @@ -956,36 +956,24 @@ void test_builtin_os_log_errno(void) { void test_builtin_os_log_long_double(void *buf, long double ld) { // CHECK: %[[BUF_ADDR:.*]] = alloca ptr, align 8 // CHECK: %[[LD_ADDR:.*]] = alloca x86_fp80, align 16 - // CHECK: %[[COERCE:.*]] = alloca i128, align 16 // CHECK: store ptr %[[BUF]], ptr %[[BUF_ADDR]], align 8 // CHECK: store x86_fp80 %[[LD]], ptr %[[LD_ADDR]], align 16 // CHECK: %[[V0:.*]] = load ptr, ptr %[[BUF_ADDR]], align 8 // CHECK: %[[V1:.*]] = load x86_fp80, ptr %[[LD_ADDR]], align 16 // CHECK: %[[V2:.*]] = bitcast x86_fp80 %[[V1]] to i80 // CHECK: %[[V3:.*]] = zext i80 %[[V2]] to i128 - // CHECK: store i128 %[[V3]], ptr %[[COERCE]], align 16 - // CHECK: %[[V5:.*]] = getelementptr inbounds nuw { i64, i64 }, ptr %[[COERCE]], i32 0, i32 0 - // CHECK: %[[V6:.*]] = load i64, ptr %[[V5]], align 16 - // CHECK: %[[V7:.*]] = getelementptr inbounds nuw { i64, i64 }, ptr %[[COERCE]], i32 0, i32 1 - // CHECK: %[[V8:.*]] = load i64, ptr %[[V7]], align 8 - // CHECK: call void @__os_log_helper_1_0_1_16_0(ptr noundef %[[V0]], i64 noundef %[[V6]], i64 noundef %[[V8]]) + // CHECK: call void @__os_log_helper_1_0_1_16_0(ptr noundef %[[V0]], i128 noundef %[[V3]]) __builtin_os_log_format(buf, "%Lf", ld); } // CHECK-LABEL: define linkonce_odr hidden void @__os_log_helper_1_0_1_16_0 -// CHECK: (ptr noundef %[[BUFFER:.*]], i64 noundef %[[ARG0_COERCE0:.*]], i64 noundef %[[ARG0_COERCE1:.*]]) +// CHECK: (ptr noundef %[[BUFFER:.*]], i128 noundef %[[ARG0:.*]]) -// CHECK: %[[ARG0:.*]] = alloca i128, align 16 // CHECK: %[[BUFFER_ADDR:.*]] = alloca ptr, align 8 // CHECK: %[[ARG0_ADDR:.*]] = alloca i128, align 16 -// CHECK: %[[V1:.*]] = getelementptr inbounds nuw { i64, i64 }, ptr %[[ARG0]], i32 0, i32 0 -// CHECK: store i64 %[[ARG0_COERCE0]], ptr %[[V1]], align 16 -// CHECK: %[[V2:.*]] = getelementptr inbounds nuw { i64, i64 }, ptr %[[ARG0]], i32 0, i32 1 -// CHECK: store i64 %[[ARG0_COERCE1]], ptr %[[V2]], align 8 -// CHECK: %[[ARG01:.*]] = load i128, ptr %[[ARG0]], align 16 // CHECK: store ptr %[[BUFFER]], ptr %[[BUFFER_ADDR]], align 8 -// CHECK: store i128 %[[ARG01]], ptr %[[ARG0_ADDR]], align 16 +// CHECK: store i128 %[[ARG0]], ptr %[[ARG0_ADDR]], align 16 // CHECK: %[[BUF:.*]] = load ptr, ptr %[[BUFFER_ADDR]], align 8 // CHECK: %[[SUMMARY:.*]] = getelementptr i8, ptr %[[BUF]], i64 0 // CHECK: store i8 0, ptr %[[SUMMARY]], align 1 diff --git a/clang/test/CodeGen/ext-int-cc.c b/clang/test/CodeGen/ext-int-cc.c index f31a4eb240c25..fdca4012ee4a4 100644 --- a/clang/test/CodeGen/ext-int-cc.c +++ b/clang/test/CodeGen/ext-int-cc.c @@ -32,7 +32,7 @@ // Make sure 128 and 64 bit versions are passed like integers. void ParamPassing(_BitInt(128) b, _BitInt(64) c) {} -// LIN64: define{{.*}} void @ParamPassing(i64 %{{.+}}, i64 %{{.+}}, i64 %{{.+}}) +// LIN64: define{{.*}} void @ParamPassing(i128 %{{.+}}, i64 %{{.+}}) // WIN64: define dso_local void @ParamPassing(ptr %{{.+}}, i64 %{{.+}}) // LIN32: define{{.*}} void @ParamPassing(ptr %{{.+}}, i64 %{{.+}}) // WIN32: define dso_local void @ParamPassing(ptr %{{.+}}, i64 %{{.+}}) @@ -251,7 +251,7 @@ _BitInt(127) ReturnPassing3(void) { return 0; } // LA32: define{{.*}} void @ReturnPassing3(ptr dead_on_unwind noalias writable sret _BitInt(128) ReturnPassing4(void) { return 0; } -// LIN64: define{{.*}} { i64, i64 } @ReturnPassing4( +// LIN64: define{{.*}} i128 @ReturnPassing4( // WIN64: define dso_local void @ReturnPassing4(ptr dead_on_unwind noalias writable sret // LIN32: define{{.*}} void @ReturnPassing4(ptr dead_on_unwind noalias writable sret // WIN32: define dso_local void @ReturnPassing4(ptr dead_on_unwind noalias writable sret diff --git a/clang/test/CodeGen/extend-arg-64.c b/clang/test/CodeGen/extend-arg-64.c index 2cb56d35af21d..8b99c01807ecc 100644 --- a/clang/test/CodeGen/extend-arg-64.c +++ b/clang/test/CodeGen/extend-arg-64.c @@ -84,7 +84,7 @@ int test(void) { #ifdef D128 knr(i128); // CHECKEXT: load i128 - // CHECKEXT: call{{.*}} void (i64, i64, ...) @knr + // CHECKEXT: call{{.*}} void (i128, ...) @knr #endif knr(u32, s32, u16, s16, u8, s8); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits