Issue 182605
Summary complex-deinterleaving incorrectly rewrites floating point adds as integer adds
Labels new issue
Assignees
Reporter vtjnash
    llvm-reduce'd from a isel crash, for any CPU that has a complex-deinterleaving instruction, complex-deinterleaving appears to rewrite floating point vector reductions as vectorized integer additions.
```
opt -passes=complex-deinterleaving -debug -mcpu=apple-m3 output-cint.ll
```
```llvm
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32"
target triple = "arm64-apple-darwin25.2.0"

define [2 x float] @julia_dot_931() {
top:
  unreachable

vec.epilog.ph:                                    ; No predecessors!
  br label %vec.epilog.vector.body

vec.epilog.vector.body: ; preds = %vec.epilog.vector.body, %vec.epilog.ph
  %vec.phi298 = phi <4 x float> [ zeroinitializer, %vec.epilog.ph ], [ %3, %vec.epilog.vector.body ]
 %vec.phi299 = phi <4 x float> [ zeroinitializer, %vec.epilog.ph ], [ %2, %vec.epilog.vector.body ]
  %strided.vec301 = shufflevector <8 x float> zeroinitializer, <8 x float> zeroinitializer, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  %strided.vec302 = shufflevector <8 x float> zeroinitializer, <8 x float> zeroinitializer, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
  %0 = fmul <4 x float> %strided.vec301, zeroinitializer
  %1 = fmul <4 x float> %strided.vec302, zeroinitializer
  %2 = fadd <4 x float> %vec.phi299, %0
 %3 = fadd <4 x float> %vec.phi298, %1
  br i1 false, label %vec.epilog.middle.block, label %vec.epilog.vector.body

vec.epilog.middle.block: ; preds = %vec.epilog.vector.body
  %4 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> %3)
  %5 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> %2)
  br label %L288.postloop

L288.postloop: ; preds = %L288.postloop, %vec.epilog.middle.block
  br label %L288.postloop
}

; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>) #0

; uselistorder directives
uselistorder ptr @llvm.vector.reduce.fadd.v4f32, { 1, 0 }

attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
```

After the pass, the `llvm.vector.reduce.fadd.v4f32` has been rewritten as `llvm.vector.reduce.add.v8f32` (note lack of `f`):
```
vec.epilog.vector.body:                           ; preds = %vec.epilog.vector.body, %vec.epilog.ph
  %0 = phi <8 x float> [ zeroinitializer, %vec.epilog.ph ], [ %0, %vec.epilog.vector.body ]
  %1 = phi <8 x float> [ zeroinitializer, %vec.epilog.ph ], [ %1, %vec.epilog.vector.body ]
  br i1 false, label %vec.epilog.middle.block, label %vec.epilog.vector.body

vec.epilog.middle.block: ; preds = %vec.epilog.vector.body
  %2 = call float @llvm.vector.reduce.add.v8f32(<8 x float> %1)
  %3 = call float @llvm.vector.reduce.add.v8f32(<8 x float> %0)
  br label %L288.postloop
```

To reproduce, the CPU type must satisfy:
```
bool AArch64TargetLowering::isComplexDeinterleavingSupported() const {
  return Subtarget->hasSVE() || Subtarget->hasSVE2() ||
 Subtarget->hasComplxNum();
}
```

Originally reported as https://github.com/JuliaLang/julia/issues/61092
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to