Issue 181529
Summary Spurious stack store not eliminated
Labels new issue
Assignees
Reporter Validark
    [Zig Godbolt](https://zig.godbo.lt/#g:!((g:!((g:!((h:codeEditor,i:(filename:'1',fontScale:14,fontUsePx:'0',j:1,lang:zig,selection:(endColumn:2,endLineNumber:14,positionColumn:1,positionLineNumber:1,selectionStartColumn:2,selectionStartLineNumber:14,startColumn:1,startLineNumber:1),source:'fn+cast_to_u6(x:+u7)+%3Fu6+%7B%0A++++if+(x+%3E+63)+%7B%0A++++++++return+null%3B%0A++++%7D+else+%7B%0A++++++++return+@as(u6,+@intCast(x))%3B%0A++++%7D%0A%7D%0A%0Aexport+fn+foo(x:+u64)+u64+%7B%0A++++return+if+(cast_to_u6(@clz(x)))+%7Clzcnt%7C%0A++++++++(@as(u64,+1)+%3C%3C+63)+%3E%3E+lzcnt%0A++++else%0A++++++++0%3B%0A%7D%0A%0Afn+bar(x:+u64)+u64+%7B%0A++++return+if+(@clz(x)+%3D%3D+64)+0+else+%0A++++++++(@as(u64,+1)+%3C%3C+63)+%3E%3E+@intCast(@clz(x))%3B%0A%7D%0A'),l:'5',n:'0',o:'Zig+source+%231',t:'0')),k:50.7503111948703,l:'4',m:100,n:'0',o:'',s:0,t:'0'),(g:!((h:compiler,i:(compiler:ztrunk,filters:(b:'0',binary:'1',binaryObject:'1',commentOnly:'0',debugCalls:'1',demangle:'0',directives:'0',execute:'1',intel:'0',libraryCode:'0',trim:'1',verboseDemangling:'0'),flagsViewOpen:'1',fontScale:20,fontUsePx:'0',j:2,lang:zig,libs:!(),options:'-O+ReleaseFast+-target+x86_64-linux+-mcpu%3Dznver5+-fomit-frame-pointer',overrides:!(),selection:(endColumn:1,endLineNumber:1,positionColumn:1,positionLineNumber:1,selectionStartColumn:1,selectionStartLineNumber:1,startColumn:1,startLineNumber:1),source:1),l:'5',n:'0',o:'+zig+trunk+(Editor+%231)',t:'0')),header:(),k:49.249688805129715,l:'4',m:100,n:'0',o:'',s:0,t:'0')),l:'2',m:100,n:'0',o:'',t:'0')),version:4)
```zig
fn cast_to_u6(x: u7) ?u6 {
    if (x > 63) {
        return null;
    } else {
        return @as(u6, @intCast(x));
    }
}

export fn foo(x: u64) u64 {
    return if (cast_to_u6(@clz(x))) |lzcnt|
        (@as(u64, 1) << 63) >> lzcnt
    else
        0;
}
```

LLVM unoptimized dump:

```llvm
; ModuleID = 'main'
source_filename = "main"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux5.10.0-musl"

@0 = private unnamed_addr constant { i6, i8 } { i6 undef, i8 0 }, align 1
@builtin.zig_backend = internal unnamed_addr constant i64 2, align 8
@start.simplified_logic = internal unnamed_addr constant i1 false, align 1
@builtin.output_mode = internal unnamed_addr constant i2 -2, align 1

; Function Attrs: nounwind uwtable
define dso_local i64 @foo(i64 %0) #0 {
1:
  %2 = alloca { i6, i8 }, align 1
  %3 = call i64 @llvm.ctlz.i64(i64 %0, i1 false)
  %4 = trunc i64 %3 to i7
  call fastcc void @main.cast_to_u6(ptr sret({ i6, i8 }) %2, i7 %4)
  %5 = getelementptr inbounds { i6, i8 }, ptr %2, i32 0, i32 1
  %6 = load i8, ptr %5
  %7 = icmp ne i8 %6, 0
  br i1 %7, label %10, label %16

8:
  %9 = phi i64 [ %15, %10 ], [ 0, %16 ]
  ret i64 %9

10:
 %11 = getelementptr inbounds { i6, i8 }, ptr %2, i32 0, i32 0
  %12 = load i8, ptr %11, align 1
  %13 = trunc i8 %12 to i6
  %14 = zext i6 %13 to i64
  %15 = lshr i64 -9223372036854775808, %14
  br label %8

16:
  br label %8
}

; Function Attrs: nounwind speculatable willreturn nofree nosync nocallback memory(none)
declare i64 @llvm.ctlz.i64(i64 %0, i1 immarg %1) #1

; Function Attrs: nounwind uwtable
define internal fastcc void @main.cast_to_u6(ptr noalias sret({ i6, i8 }) nonnull %0, i7 %1) unnamed_addr #0 {
2:
  %3 = icmp ugt i7 %1, 63
  br i1 %3, label %4, label %5

4:
 call void @llvm.memcpy.p0.p0.i64(ptr align 1 %0, ptr align 1 @0, i64 2, i1 false)
  ret void

5:
  %6 = icmp ule i7 %1, 63
  call void @llvm.assume(i1 %6)
  %7 = trunc i7 %1 to i6
  %8 = getelementptr inbounds { i6, i8 }, ptr %0, i32 0, i32 0
  store i6 %7, ptr %8, align 1
  %9 = getelementptr inbounds { i6, i8 }, ptr %0, i32 0, i32 1
  store i8 1, ptr %9
  ret void
}

; Function Attrs: nounwind willreturn nofree nocallback memory(argmem: readwrite)
declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly %0, ptr noalias nocapture readonly %1, i64 %2, i1 immarg %3) #2

; Function Attrs: nounwind willreturn nofree nosync nocallback memory(inaccessiblemem: write)
declare void @llvm.assume(i1 noundef %0) #3

attributes #0 = { nounwind uwtable "frame-pointer"="all" "target-cpu"="znver5" "target-features"="+64bit,+adx,+aes,+allow-light-256-bit,+avx,+avx2,+avx512bf16,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vp2intersect,+avx512vpopcntdq,+avxvnni,+bmi,+bmi2,+branchfusion,+clflushopt,+clwb,+clzero,+cmov,+crc32,+cx16,+cx8,+evex512,+f16c,+fast-15bytenop,+fast-bextr,+fast-dpwssd,+fast-imm16,+fast-lzcnt,+fast-movbe,+fast-scalar-fsqrt,+fast-scalar-shift-masks,+fast-variable-perlane-shuffle,+fast-vector-fsqrt,+fma,+fsgsbase,+fsrm,+fxsr,+gfni,+idivq-to-divl,+invpcid,+lzcnt,+macrofusion,+mmx,+movbe,+movdir64b,+movdiri,+mwaitx,+nopl,+pclmul,+pku,+popcnt,+prefetchi,+prfchw,+rdpid,+rdpru,+rdrnd,+rdseed,+sahf,+sbb-dep-breaking,+sha,+shstk,+slow-shld,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+sse4a,+ssse3,+vaes,+vpclmulqdq,+vzeroupper,+wbnoinvd,+x87,+xsave,+xsavec,+xsaveopt,+xsaves,-16bit-mode,-32bit-mode,-amx-avx512,-amx-bf16,-amx-complex,-amx-fp16,-amx-fp8,-amx-int8,-amx-movrs,-amx-tf32,-amx-tile,-amx-transpose,-avx10.1-256,-avx10.1-512,-avx10.2-256,-avx10.2-512,-avx512fp16,-avxifma,-avxneconvert,-avxvnniint16,-avxvnniint8,-branch-hint,-ccmp,-cf,-cldemote,-cmpccxadd,-egpr,-enqcmd,-ermsb,-false-deps-getmant,-false-deps-lzcnt-tzcnt,-false-deps-mulc,-false-deps-mullq,-false-deps-perm,-false-deps-popcnt,-false-deps-range,-fast-11bytenop,-fast-7bytenop,-fast-gather,-fast-hops,-fast-shld-rotate,-fast-variable-crosslane-shuffle,-fast-vector-shift-masks,-faster-shift-than-shuffle,-fma4,-harden-sls-ijmp,-harden-sls-ret,-hreset,-idivl-to-divb,-inline-asm-use-gpr32,-kl,-lea-sp,-lea-uses-ag,-lvi-cfi,-lvi-load-hardening,-lwp,-movrs,-ndd,-nf,-no-bypass-delay,-no-bypass-delay-blend,-no-bypass-delay-mov,-no-bypass-delay-shuffle,-pad-short-functions,-pconfig,-ppx,-prefer-128-bit,-prefer-256-bit,-prefer-mask-registers,-prefer-movmsk-over-vtest,-prefer-no-gather,-prefer-no-scatter,-ptwrite,-push2pop2,-raoint,-retpoline,-retpoline-external-thunk,-retpoline-indirect-branches,-retpoline-indirect-calls,-rtm,-serialize,-seses,-sgx,-sha512,-slow-3ops-lea,-slow-incdec,-slow-lea,-slow-pmaddwd,-slow-pmulld,-slow-two-mem-ops,-slow-unaligned-mem-16,-slow-unaligned-mem-32,-sm3,-sm4,-soft-float,-sse-unaligned-mem,-tagged-globals,-tbm,-tsxldtrk,-tuning-fast-imm-vector-shift,-uintr,-use-glm-div-sqrt-costs,-use-slm-arith-costs,-usermsr,-waitpkg,-widekl,-xop,-zu" }
attributes #1 = { nounwind speculatable willreturn nofree nosync nocallback memory(none) }
attributes #2 = { nounwind willreturn nofree nocallback memory(argmem: readwrite) }
attributes #3 = { nounwind willreturn nofree nosync nocallback memory(inaccessiblemem: write) }

!llvm.module.flags = !{}
```

Emit:

```asm
foo:
        test rdi, rdi
        je      .LBB0_1
        lzcnt   rcx, rdi
        movabs rax, -9223372036854775808
        shrx    rax, rax, rcx
        and     cl, 63
        mov     byte ptr [rsp - 1], cl
        ret
.LBB0_1:
 xor     eax, eax
        ret
```

Should be:

```asm
foo:
 lzcnt   rax, rdi
        movabs  rcx, -9223372036854775808
        shrx rax, rcx, rax
        cmovb   rax, rdi
        ret
```
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to