https://github.com/akadutta updated https://github.com/llvm/llvm-project/pull/176414
>From 02783dd2e1679984611cc62c588e4b8e25b91223 Mon Sep 17 00:00:00 2001 From: akadutta <[email protected]> Date: Fri, 16 Jan 2026 09:14:20 -0600 Subject: [PATCH 1/2] Preserve multi-dimensional array structure in GEP optimization --- .../InstCombine/InstructionCombining.cpp | 96 +++++++++++-------- .../InstCombine/canonicalize-gep-constglob.ll | 4 +- llvm/test/Transforms/InstCombine/strcmp-3.ll | 6 +- llvm/test/Transforms/InstCombine/strlen-8.ll | 10 +- 4 files changed, 67 insertions(+), 49 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index d24db3de8f7b3..0daf76227418b 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2921,12 +2921,21 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP, Indices.append(GEP.op_begin() + 2, GEP.op_end()); // Don't create GEPs with more than one non-zero index. - unsigned NumNonZeroIndices = count_if(Indices, [](Value *Idx) { - auto *C = dyn_cast<Constant>(Idx); - return !C || !C->isNullValue(); - }); - if (NumNonZeroIndices > 1) - return nullptr; + // Exception: For AMDGPU, preserve multi-dimensional array structure for + // better backend optimization (memory coalescing, vectorization). Check if + // the source element type is a multi-dimensional array. + Type *GEPSrcElemTy = GEP.getSourceElementType(); + bool IsMultiDimArray_Strip = GEPSrcElemTy->isArrayTy() && + GEPSrcElemTy->getArrayElementType()->isArrayTy(); + + if (!IsMultiDimArray_Strip) { + unsigned NumNonZeroIndices = count_if(Indices, [](Value *Idx) { + auto *C = dyn_cast<Constant>(Idx); + return !C || !C->isNullValue(); + }); + if (NumNonZeroIndices > 1) + return nullptr; + } return replaceInstUsesWith( GEP, Builder.CreateGEP( @@ -3364,17 +3373,24 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) { drop_end(Indices), "", GEP.getNoWrapFlags())); } - // Strip leading zero indices. - auto *FirstIdx = dyn_cast<Constant>(Indices.front()); - if (FirstIdx && FirstIdx->isNullValue() && - !FirstIdx->getType()->isVectorTy()) { - gep_type_iterator GTI = gep_type_begin(GEP); - ++GTI; - if (!GTI.isStruct()) - return replaceInstUsesWith(GEP, Builder.CreateGEP(GTI.getIndexedType(), - GEP.getPointerOperand(), - drop_begin(Indices), "", - GEP.getNoWrapFlags())); + // Strip leading zero indices (except for multi-dimensional arrays). + // Preserve structure for better backend optimization. + Type *GEPSrcElemTy = GEP.getSourceElementType(); + bool IsMultiDimArray_Strip = GEPSrcElemTy->isArrayTy() && + GEPSrcElemTy->getArrayElementType()->isArrayTy(); + + if (!IsMultiDimArray_Strip) { + auto *FirstIdx = dyn_cast<Constant>(Indices.front()); + if (FirstIdx && FirstIdx->isNullValue() && + !FirstIdx->getType()->isVectorTy()) { + gep_type_iterator GTI = gep_type_begin(GEP); + ++GTI; + if (!GTI.isStruct()) + return replaceInstUsesWith(GEP, Builder.CreateGEP(GTI.getIndexedType(), + GEP.getPointerOperand(), + drop_begin(Indices), "", + GEP.getNoWrapFlags())); + } } // Scalarize vector operands; prefer splat-of-gep.as canonical form. @@ -3403,29 +3419,33 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) { return replaceInstUsesWith(GEP, Res); } - bool SeenNonZeroIndex = false; - for (auto [IdxNum, Idx] : enumerate(Indices)) { - auto *C = dyn_cast<Constant>(Idx); - if (C && C->isNullValue()) - continue; + // GEP has multiple non-zero indices: Split it (except for multi-dim arrays). + // Preserve structure for better backend optimization. + if (!IsMultiDimArray_Strip) { + bool SeenNonZeroIndex = false; + for (auto [IdxNum, Idx] : enumerate(Indices)) { + auto *C = dyn_cast<Constant>(Idx); + if (C && C->isNullValue()) + continue; - if (!SeenNonZeroIndex) { - SeenNonZeroIndex = true; - continue; - } + if (!SeenNonZeroIndex) { + SeenNonZeroIndex = true; + continue; + } - // GEP has multiple non-zero indices: Split it. - ArrayRef<Value *> FrontIndices = ArrayRef(Indices).take_front(IdxNum); - Value *FrontGEP = - Builder.CreateGEP(GEPEltType, PtrOp, FrontIndices, - GEP.getName() + ".split", GEP.getNoWrapFlags()); - - SmallVector<Value *> BackIndices; - BackIndices.push_back(Constant::getNullValue(NewScalarIndexTy)); - append_range(BackIndices, drop_begin(Indices, IdxNum)); - return GetElementPtrInst::Create( - GetElementPtrInst::getIndexedType(GEPEltType, FrontIndices), FrontGEP, - BackIndices, GEP.getNoWrapFlags()); + // GEP has multiple non-zero indices: Split it. + ArrayRef<Value *> FrontIndices = ArrayRef(Indices).take_front(IdxNum); + Value *FrontGEP = + Builder.CreateGEP(GEPEltType, PtrOp, FrontIndices, + GEP.getName() + ".split", GEP.getNoWrapFlags()); + + SmallVector<Value *> BackIndices; + BackIndices.push_back(Constant::getNullValue(NewScalarIndexTy)); + append_range(BackIndices, drop_begin(Indices, IdxNum)); + return GetElementPtrInst::Create( + GetElementPtrInst::getIndexedType(GEPEltType, FrontIndices), FrontGEP, + BackIndices, GEP.getNoWrapFlags()); + } } // Check to see if the inputs to the PHI node are getelementptr instructions. diff --git a/llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll b/llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll index 129da3f9110ad..6d238ae497d07 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll @@ -35,9 +35,7 @@ define ptr @xzy(i64 %x, i64 %y, i64 %z) { ; CHECK-LABEL: define ptr @xzy( ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], i64 [[Z:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[GEP_SPLIT:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr getelementptr inbounds nuw (i8, ptr @glob, i64 40), i64 [[X]] -; CHECK-NEXT: [[GEP_SPLIT1:%.*]] = getelementptr inbounds [10 x i32], ptr [[GEP_SPLIT]], i64 [[Z]] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[GEP_SPLIT1]], i64 [[Y]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr inbounds nuw (i8, ptr @glob, i64 40), i64 0, i64 [[X]], i64 [[Z]], i64 [[Y]] ; CHECK-NEXT: ret ptr [[GEP]] ; entry: diff --git a/llvm/test/Transforms/InstCombine/strcmp-3.ll b/llvm/test/Transforms/InstCombine/strcmp-3.ll index 72da736a0a9fd..2c4012b96e188 100644 --- a/llvm/test/Transforms/InstCombine/strcmp-3.ll +++ b/llvm/test/Transforms/InstCombine/strcmp-3.ll @@ -25,7 +25,7 @@ define i32 @fold_strcmp_a5i0_a5i1_to_0() { define i32 @call_strcmp_a5i0_a5iI(i64 %I) { ; CHECK-LABEL: @call_strcmp_a5i0_a5iI( -; CHECK-NEXT: [[Q:%.*]] = getelementptr [4 x i8], ptr @a5, i64 [[I:%.*]] +; CHECK-NEXT: [[Q:%.*]] = getelementptr [5 x [4 x i8]], ptr @a5, i64 0, i64 [[I:%.*]] ; CHECK-NEXT: [[CMP:%.*]] = call i32 @strcmp(ptr noundef nonnull dereferenceable(4) @a5, ptr noundef nonnull dereferenceable(1) [[Q]]) ; CHECK-NEXT: ret i32 [[CMP]] ; @@ -40,7 +40,7 @@ define i32 @call_strcmp_a5i0_a5iI(i64 %I) { define i32 @call_strcmp_a5iI_a5i0(i64 %I) { ; CHECK-LABEL: @call_strcmp_a5iI_a5i0( -; CHECK-NEXT: [[P:%.*]] = getelementptr [4 x i8], ptr @a5, i64 [[I:%.*]] +; CHECK-NEXT: [[P:%.*]] = getelementptr [5 x [4 x i8]], ptr @a5, i64 0, i64 [[I:%.*]] ; CHECK-NEXT: [[CMP:%.*]] = call i32 @strcmp(ptr noundef nonnull dereferenceable(1) [[P]], ptr noundef nonnull dereferenceable(4) @a5) ; CHECK-NEXT: ret i32 [[CMP]] ; @@ -68,7 +68,7 @@ define i32 @fold_strcmp_a5i0_a5i1_p1_to_0() { define i32 @call_strcmp_a5i0_a5i1_pI(i64 %I) { ; CHECK-LABEL: @call_strcmp_a5i0_a5i1_pI( -; CHECK-NEXT: [[Q:%.*]] = getelementptr i8, ptr getelementptr inbounds nuw (i8, ptr @a5, i64 4), i64 [[I:%.*]] +; CHECK-NEXT: [[Q:%.*]] = getelementptr [5 x [4 x i8]], ptr @a5, i64 0, i64 1, i64 [[I:%.*]] ; CHECK-NEXT: [[CMP:%.*]] = call i32 @strcmp(ptr noundef nonnull dereferenceable(4) @a5, ptr noundef nonnull dereferenceable(1) [[Q]]) ; CHECK-NEXT: ret i32 [[CMP]] ; diff --git a/llvm/test/Transforms/InstCombine/strlen-8.ll b/llvm/test/Transforms/InstCombine/strlen-8.ll index af12198069803..b4334ddd8f1ac 100644 --- a/llvm/test/Transforms/InstCombine/strlen-8.ll +++ b/llvm/test/Transforms/InstCombine/strlen-8.ll @@ -16,7 +16,7 @@ declare i64 @strlen(ptr) define i64 @fold_a5_4_i0_pI(i64 %I) { ; CHECK-LABEL: @fold_a5_4_i0_pI( -; CHECK-NEXT: [[PTR:%.*]] = getelementptr i8, ptr @a5_4, i64 [[I:%.*]] +; CHECK-NEXT: [[PTR:%.*]] = getelementptr [5 x [4 x i8]], ptr @a5_4, i64 0, i64 0, i64 [[I:%.*]] ; CHECK-NEXT: [[LEN:%.*]] = call i64 @strlen(ptr noundef nonnull dereferenceable(1) [[PTR]]) ; CHECK-NEXT: ret i64 [[LEN]] ; @@ -30,7 +30,7 @@ define i64 @fold_a5_4_i0_pI(i64 %I) { define i64 @fold_a5_4_i1_pI(i64 %I) { ; CHECK-LABEL: @fold_a5_4_i1_pI( -; CHECK-NEXT: [[PTR:%.*]] = getelementptr i8, ptr getelementptr inbounds nuw (i8, ptr @a5_4, i64 4), i64 [[I:%.*]] +; CHECK-NEXT: [[PTR:%.*]] = getelementptr [5 x [4 x i8]], ptr @a5_4, i64 0, i64 1, i64 [[I:%.*]] ; CHECK-NEXT: [[LEN:%.*]] = call i64 @strlen(ptr noundef nonnull dereferenceable(1) [[PTR]]) ; CHECK-NEXT: ret i64 [[LEN]] ; @@ -44,7 +44,7 @@ define i64 @fold_a5_4_i1_pI(i64 %I) { define i64 @fold_a5_4_i2_pI(i64 %I) { ; CHECK-LABEL: @fold_a5_4_i2_pI( -; CHECK-NEXT: [[PTR:%.*]] = getelementptr i8, ptr getelementptr inbounds nuw (i8, ptr @a5_4, i64 8), i64 [[I:%.*]] +; CHECK-NEXT: [[PTR:%.*]] = getelementptr [5 x [4 x i8]], ptr @a5_4, i64 0, i64 2, i64 [[I:%.*]] ; CHECK-NEXT: [[LEN:%.*]] = call i64 @strlen(ptr noundef nonnull dereferenceable(1) [[PTR]]) ; CHECK-NEXT: ret i64 [[LEN]] ; @@ -58,7 +58,7 @@ define i64 @fold_a5_4_i2_pI(i64 %I) { define i64 @fold_a5_4_i3_pI_to_0(i64 %I) { ; CHECK-LABEL: @fold_a5_4_i3_pI_to_0( -; CHECK-NEXT: [[PTR:%.*]] = getelementptr i8, ptr getelementptr inbounds nuw (i8, ptr @a5_4, i64 12), i64 [[I:%.*]] +; CHECK-NEXT: [[PTR:%.*]] = getelementptr [5 x [4 x i8]], ptr @a5_4, i64 0, i64 3, i64 [[I:%.*]] ; CHECK-NEXT: [[LEN:%.*]] = call i64 @strlen(ptr noundef nonnull dereferenceable(1) [[PTR]]) ; CHECK-NEXT: ret i64 [[LEN]] ; @@ -72,7 +72,7 @@ define i64 @fold_a5_4_i3_pI_to_0(i64 %I) { define i64 @fold_a5_4_i4_pI_to_0(i64 %I) { ; CHECK-LABEL: @fold_a5_4_i4_pI_to_0( -; CHECK-NEXT: [[PTR:%.*]] = getelementptr i8, ptr getelementptr inbounds nuw (i8, ptr @a5_4, i64 16), i64 [[I:%.*]] +; CHECK-NEXT: [[PTR:%.*]] = getelementptr [5 x [4 x i8]], ptr @a5_4, i64 0, i64 4, i64 [[I:%.*]] ; CHECK-NEXT: [[LEN:%.*]] = call i64 @strlen(ptr noundef nonnull dereferenceable(1) [[PTR]]) ; CHECK-NEXT: ret i64 [[LEN]] ; >From fe8abc6c745ab61d14d08ff22df1d16d9aa1fbfc Mon Sep 17 00:00:00 2001 From: akadutta <[email protected]> Date: Fri, 16 Jan 2026 13:01:04 -0600 Subject: [PATCH 2/2] update clang test --- clang/test/CodeGen/union-tbaa1.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/clang/test/CodeGen/union-tbaa1.c b/clang/test/CodeGen/union-tbaa1.c index 3f6ada5023f27..c512c011e588f 100644 --- a/clang/test/CodeGen/union-tbaa1.c +++ b/clang/test/CodeGen/union-tbaa1.c @@ -11,13 +11,13 @@ void bar(vect32 p[][2]); // CHECK-LABEL: define dso_local void @fred( // CHECK-SAME: i32 noundef [[NUM:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[VEC:%.*]], ptr noundef readonly captures(none) [[INDEX:%.*]], ptr noundef readonly captures(none) [[ARR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP:%.*]] = alloca [4 x [2 x %union.vect32]], align 8 +// CHECK-NEXT: [[TMP:%.*]] = alloca [4 x [2 x [[UNION_VECT32:%.*]]]], align 8 // CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[TMP]]) #[[ATTR3:[0-9]+]] // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[INDEX]], align 4, !tbaa [[TBAA2:![0-9]+]] // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARR]], i32 [[TMP0]] // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA2]] // CHECK-NEXT: [[MUL:%.*]] = mul i32 [[TMP1]], [[NUM]] -// CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %union.vect32], ptr [[TMP]], i32 [[TMP0]] +// CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x [2 x [[UNION_VECT32]]]], ptr [[TMP]], i32 0, i32 [[TMP0]] // CHECK-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX2]], align 8, !tbaa [[TBAA6:![0-9]+]] // CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i32 4 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4, !tbaa [[TBAA2]] @@ -27,7 +27,7 @@ void bar(vect32 p[][2]); // CHECK-NEXT: [[TMP3:%.*]] = lshr i32 [[MUL]], 16 // CHECK-NEXT: store i32 [[TMP3]], ptr [[VEC]], align 4, !tbaa [[TBAA2]] // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[INDEX]], align 4, !tbaa [[TBAA2]] -// CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [2 x %union.vect32], ptr [[TMP]], i32 [[TMP4]] +// CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [4 x [2 x [[UNION_VECT32]]]], ptr [[TMP]], i32 0, i32 [[TMP4]] // CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX13]], i32 6 // CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr [[ARRAYIDX15]], align 2, !tbaa [[TBAA6]] // CHECK-NEXT: [[CONV16:%.*]] = zext i16 [[TMP5]] to i32 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
