Issue 178538
Summary [SLP] llc timeouts on x86_64 on Android or with -mcpu x86-64-v2
Labels new issue
Assignees
Reporter google-yfyang
    Reproducer:

repro.ll
```
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"

define <4 x float> @_ZNK9blah20foo12barEPS0_(<4 x float> %0) {
entry:
  %1 = shufflevector <4 x float> <float poison, float poison, float 1.000000e+00, float poison>, <4 x float> %0, <4 x i32> <i32 5, i32 poison, i32 2, i32 poison>
  %2 = insertelement <4 x float> %1, float 1.000000e+00, i64 1
  %3 = insertelement <4 x float> %2, float 1.000000e+00, i64 3
  ret <4 x float> %3
}
```

Run `path/to/llc repro.ll -o /dev/null -mcpu x86-64-v2`

This was initially found on Android_x86_64 with a more complicated reproducer. But I can only get the compilation to hang with clang with Android

Command to run 
`clang repro.ll -o /dev/null -c -O2 -target x86_64-linux-android24`

```
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-android24"
define noundef zeroext i1 @_ZNK9bar0foo12blahEPS0_(ptr noundef nonnull readonly align 4 captures(none) dereferenceable(36) %this, ptr noundef writeonly captures(none) %result) local_unnamed_addr #3 align 2 {
entry:
  %arrayidx3 = getelementptr inbounds nuw i8, ptr %this, i64 16
  %arrayidx5 = getelementptr inbounds nuw i8, ptr %this, i64 32
  %arrayidx8 = getelementptr inbounds nuw i8, ptr %this, i64 4
  %arrayidx10 = getelementptr inbounds nuw i8, ptr %this, i64 20
  %arrayidx13 = getelementptr inbounds nuw i8, ptr %this, i64 24
 %arrayidx16 = getelementptr inbounds nuw i8, ptr %this, i64 8
  %arrayidx18 = getelementptr inbounds nuw i8, ptr %this, i64 12
  %arrayidx21 = getelementptr inbounds nuw i8, ptr %this, i64 28
  %0 = load float, ptr %arrayidx3, align 4
  %1 = load float, ptr %arrayidx10, align 4
  %2 = load <2 x float>, ptr %arrayidx13, align 4
  %3 = extractelement <2 x float> %2, i64 0
  %4 = load float, ptr %arrayidx16, align 4
  %5 = load float, ptr %this, align 4
  %6 = load float, ptr %arrayidx8, align 4
  %7 = load float, ptr %arrayidx18, align 4
  %mul = fmul float %5, %0
  %8 = load <2 x float>, ptr %arrayidx21, align 4
  %9 = load float, ptr %arrayidx5, align 4
  %mul6 = fmul float %mul, %9
  %mul11 = fmul float %6, %1
  %mul14 = fmul float %mul11, %3
  %add = fadd float %mul6, %mul14
  %mul19 = fmul float %4, %7
  %10 = extractelement <2 x float> %8, i64 0
  %mul22 = fmul float %mul19, %10
  %add23 = fadd float %add, %mul22
  %mul28 = fmul float %5, %1
  %mul31 = fmul float %mul28, %10
  %sub = fsub float %add23, %mul31
  %mul36 = fmul float %0, %4
  %mul39 = fmul float %3, %mul36
 %sub40 = fsub float %sub, %mul39
  %mul45 = fmul float %6, %7
  %mul48 = fmul float %9, %mul45
  %sub49 = fsub float %sub40, %mul48
  %cmp = fcmp une float %sub49, 0.000000e+00
  br i1 %cmp, label %if.end, label %cleanup

if.end:                                           ; preds = %entry
  %sub96.i = fsub float %mul, %mul45
  %arrayidx9.i.i = getelementptr inbounds nuw i8, ptr %result, i64 16
  %arrayidx17.i.i = getelementptr inbounds nuw i8, ptr %result, i64 32
  %11 = insertelement <4 x float> poison, float %0, i64 0
  %12 = insertelement <4 x float> %11, float %4, i64 1
  %13 = insertelement <4 x float> %12, float %mul11, i64 2
 %14 = insertelement <4 x float> %13, float %1, i64 3
  %15 = shufflevector <2 x float> %2, <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 0>
 %16 = insertelement <4 x float> %15, float 1.000000e+00, i64 2
  %17 = shufflevector <2 x float> %8, <2 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
  %18 = shufflevector <4 x float> %17, <4 x float> %16, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
  %19 = fmul <4 x float> %14, %18
  %20 = shufflevector <2 x float> %8, <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
  %21 = insertelement <4 x float> %20, float %1, i64 0
  %22 = insertelement <4 x float> %21, float %mul36, i64 2
  %23 = shufflevector <4 x float> %22, <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 1>
  %24 = shufflevector <4 x float> <float poison, float poison, float 1.000000e+00, float poison>, <4 x float> %15, <4 x i32> <i32 5, i32 poison, i32 2, i32 poison>
  %25 = insertelement <4 x float> %24, float %6, i64 1
  %26 = insertelement <4 x float> %25, float %7, i64 3
  %27 = fmul <4 x float> %23, %26
  %28 = fsub <4 x float> %19, %27
 %29 = insertelement <4 x float> poison, float %sub49, i64 0
  %30 = shufflevector <4 x float> %29, <4 x float> poison, <4 x i32> zeroinitializer
  %31 = fdiv <4 x float> %28, %30
  store <4 x float> %31, ptr %result, align 4
  %32 = insertelement <4 x float> poison, float %5, i64 0
  %33 = insertelement <4 x float> %32, float %mul19, i64 1
  %34 = insertelement <4 x float> %33, float %7, i64 2
  %35 = insertelement <4 x float> %34, float %6, i64 3
  %36 = shufflevector <2 x float> %2, <2 x float> %8, <4 x i32> <i32 3, i32 poison, i32 2, i32 0>
  %37 = insertelement <4 x float> %36, float 1.000000e+00, i64 1
  %38 = fmul <4 x float> %35, %37
  %39 = insertelement <4 x float> %15, float %mul28, i64 1
  %40 = insertelement <4 x float> %39, float %0, i64 2
  %41 = insertelement <4 x float> %40, float %5, i64 3
  %42 = insertelement <4 x float> %15, float 1.000000e+00, i64 1
  %43 = insertelement <4 x float> %42, float %4, i64 0
 %44 = shufflevector <4 x float> %43, <4 x float> %20, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
  %45 = fmul <4 x float> %41, %44
  %46 = fsub <4 x float> %38, %45
  %47 = fdiv <4 x float> %46, %30
  store <4 x float> %47, ptr %arrayidx9.i.i, align 4
  %div.8 = fdiv float %sub96.i, %sub49
  store float %div.8, ptr %arrayidx17.i.i, align 4
  br label %cleanup

cleanup: ; preds = %if.end, %entry
  %48 = fcmp une float %sub49, 0.000000e+00
  ret i1 %48
}

``` 

This works on `x86_64-grtev4-linux-gnu` without `-march=x86-64-v2`. It looks like this has something to do with target features that are enabled by default on Android and on x86-64-v2.

>From debug statements, compilation appears to be stuck in an infinite loop whose body looks like this.
```
Legalizing: t202: v4f32,ch = load<(load (s128) from constant-pool)> t0, t201, undef:i64
Legalizing non-extending load operation

Combining: t202: v4f32,ch = load<(load (s128) from constant-pool)> t0, t201, undef:i64

Legalizing: t37405: v4f32 = X86ISD::INSERTPS t202, t30, TargetConstant:i8<74>
Legal node: nothing to do

Combining: t37405: v4f32 = X86ISD::INSERTPS t202, t30, TargetConstant:i8<74>

Legalizing: t37404: i8 = TargetConstant<74>

Combining: t37404: i8 = TargetConstant<74>

Legalizing: t129: v4f32 = X86ISD::INSERTPS t37405, t128, TargetConstant:i8<16>
Legal node: nothing to do

Combining: t129: v4f32 = X86ISD::INSERTPS t37405, t128, TargetConstant:i8<16>

Legalizing: t117: v4f32 = X86ISD::INSERTPS t129, t116, TargetConstant:i8<48>
Legal node: nothing to do

Combining: t117: v4f32 = X86ISD::INSERTPS t129, t116, TargetConstant:i8<48>
Creating constant: t37406: i8 = TargetConstant<-27>
Creating new node: t37407: v4f32 = X86ISD::SHUFP t30, t202, TargetConstant:i8<-27>

Replacing.2 t37405: v4f32 = X86ISD::INSERTPS t202, t30, TargetConstant:i8<74>

With: t37407: v4f32 = X86ISD::SHUFP t30, t202, TargetConstant:i8<-27>

```

I was able to bisect it to #169857. But I am not sure if this is the culprit or it simply unmasks a previous issue.
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to