xiangzh1 wrote: > Can you please share the IR before the unroll pass? Sure:
**with this patch**: 4523 ; *** IR Dump After LoopDeletionPass on for.body *** 4524 4525 ; Preheader: 4526 entry: 4527 br label %for.body 4528 4529 ; Loop: 4530 for.body: ; preds = %entry, %for.body7 4531 %Dim.027 = phi i32 [ 0, %entry ], [ %inc16, %for.body7 ] 4532 %cmp1 = icmp eq i32 %Dim.027, %Dims 4533 br i1 %cmp1, label %cleanup, label %if.end 4534 4535 if.end: ; preds = %for.body 4536 %idxprom = zext nneg i32 %Dim.027 to i64 4537 %arrayidx = getelementptr inbounds ptr, ptr %Arr, i64 %idxprom 4538 br label %for.body7 4539 4540 for.body7: ; preds = %if.end 4541 %0 = load ptr, ptr %arrayidx, align 8, !tbaa !3 4542 %1 = load i32, ptr %0, align 4, !tbaa !7 4543 %2 = load i32, ptr %Out, align 4, !tbaa !7 4544 %add14 = add nsw i32 %2, %1 4545 store i32 %add14, ptr %Out, align 4, !tbaa !7 4546 tail call void @_Z3barv() #2 4547 %3 = load ptr, ptr %arrayidx, align 8, !tbaa !3 4548 %arrayidx11.1 = getelementptr inbounds i32, ptr %3, i64 1 4549 %4 = load i32, ptr %arrayidx11.1, align 4, !tbaa !7 4550 %arrayidx13.1 = getelementptr inbounds i32, ptr %Out, i64 1 4551 %5 = load i32, ptr %arrayidx13.1, align 4, !tbaa !7 4552 %add14.1 = add nsw i32 %5, %4 4553 store i32 %add14.1, ptr %arrayidx13.1, align 4, !tbaa !7 4554 tail call void @_Z3barv() #2 4555 %6 = load ptr, ptr %arrayidx, align 8, !tbaa !3 4556 %arrayidx11.2 = getelementptr inbounds i32, ptr %6, i64 2 4557 %7 = load i32, ptr %arrayidx11.2, align 4, !tbaa !7 4558 %arrayidx13.2 = getelementptr inbounds i32, ptr %Out, i64 2 4559 %8 = load i32, ptr %arrayidx13.2, align 4, !tbaa !7 4560 %add14.2 = add nsw i32 %8, %7 4561 store i32 %add14.2, ptr %arrayidx13.2, align 4, !tbaa !7 4562 tail call void @_Z3barv() #2 4563 %9 = load ptr, ptr %arrayidx, align 8, !tbaa !3 4564 %arrayidx11.3 = getelementptr inbounds i32, ptr %9, i64 3 4565 %10 = load i32, ptr %arrayidx11.3, align 4, !tbaa !7 4566 %arrayidx13.3 = getelementptr inbounds i32, ptr %Out, i64 3 4567 %11 = load i32, ptr %arrayidx13.3, align 4, !tbaa !7 4568 %add14.3 = add nsw i32 %11, %10 4569 store i32 %add14.3, ptr %arrayidx13.3, align 4, !tbaa !7 4570 tail call void @_Z3barv() #2 4571 %inc16 = add nuw nsw i32 %Dim.027, 1 4572 %exitcond = icmp ne i32 %inc16, 16 4573 br i1 %exitcond, label %for.body, label %cleanup, !llvm.loop !9 4574 4575 ; Exit blocks 4576 cleanup: ; preds = %for.body, %for.body7 4577 ret void 4578 4579 cleanup: ; preds = %for.body, %for.body7 4580 ret void 4581 ; *** IR Dump After LoopFullUnrollPass on for.body (invalidated) *** **without this patch**: 3829 ; *** IR Dump After LoopDeletionPass on if.end *** 3830 3831 ; Preheader: 3832 if.end.preheader: ; preds = %entry 3833 %0 = add i32 %Dims, -1 3834 %umin = call i32 @llvm.umin.i32(i32 %0, i32 15) 3835 %1 = add nuw nsw i32 %umin, 1 3836 %wide.trip.count = zext i32 %1 to i64 3837 br label %if.end 3838 3839 ; Loop: 3840 if.end: ; preds = %if.end.preheader, %for.body7 3841 %indvars.iv = phi i64 [ 0, %if.end.preheader ], [ %indvars.iv.next, %for.body7 ] 3842 %arrayidx = getelementptr inbounds ptr, ptr %Arr, i64 %indvars.iv 3843 br label %for.body7 3844 3845 for.body7: ; preds = %if.end 3846 %2 = load ptr, ptr %arrayidx, align 8, !tbaa !3 3847 %3 = load i32, ptr %2, align 4, !tbaa !7 3848 %4 = load i32, ptr %Out, align 4, !tbaa !7 3849 %add14 = add nsw i32 %4, %3 3850 store i32 %add14, ptr %Out, align 4, !tbaa !7 3851 tail call void @_Z3barv() #3 3852 %5 = load ptr, ptr %arrayidx, align 8, !tbaa !3 3853 %arrayidx11.1 = getelementptr inbounds i32, ptr %5, i64 1 3854 %6 = load i32, ptr %arrayidx11.1, align 4, !tbaa !7 3855 %arrayidx13.1 = getelementptr inbounds i32, ptr %Out, i64 1 3856 %7 = load i32, ptr %arrayidx13.1, align 4, !tbaa !7 3857 %add14.1 = add nsw i32 %7, %6 3858 store i32 %add14.1, ptr %arrayidx13.1, align 4, !tbaa !7 3859 tail call void @_Z3barv() #3 3860 %8 = load ptr, ptr %arrayidx, align 8, !tbaa !3 3861 %arrayidx11.2 = getelementptr inbounds i32, ptr %8, i64 2 3862 %9 = load i32, ptr %arrayidx11.2, align 4, !tbaa !7 3863 %arrayidx13.2 = getelementptr inbounds i32, ptr %Out, i64 2 3864 %10 = load i32, ptr %arrayidx13.2, align 4, !tbaa !7 3865 %add14.2 = add nsw i32 %10, %9 3866 store i32 %add14.2, ptr %arrayidx13.2, align 4, !tbaa !7 3867 tail call void @_Z3barv() #3 3868 %11 = load ptr, ptr %arrayidx, align 8, !tbaa !3 3869 %arrayidx11.3 = getelementptr inbounds i32, ptr %11, i64 3 3870 %12 = load i32, ptr %arrayidx11.3, align 4, !tbaa !7 3871 %arrayidx13.3 = getelementptr inbounds i32, ptr %Out, i64 3 3872 %13 = load i32, ptr %arrayidx13.3, align 4, !tbaa !7 3873 %add14.3 = add nsw i32 %13, %12 3874 store i32 %add14.3, ptr %arrayidx13.3, align 4, !tbaa !7 3875 tail call void @_Z3barv() #3 3876 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 3877 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count 3878 br i1 %exitcond, label %cleanup.loopexit, label %if.end, !llvm.loop !9 3879 3880 ; Exit blocks 3881 cleanup.loopexit: ; preds = %for.body7 3882 br label %cleanup 3883 ; *** IR Dump After LoopFullUnrollPass on if.end *** https://github.com/llvm/llvm-project/pull/74268 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits