Issue 182784
Summary Missed `mov+dec` => `lea` fold
Labels new issue
Assignees
Reporter Validark
    [Zig Godbolt](https://zig.godbo.lt/#g:!((g:!((g:!((h:codeEditor,i:(filename:'1',fontScale:16,fontUsePx:'0',j:1,lang:zig,selection:(endColumn:2,endLineNumber:6,positionColumn:2,positionLineNumber:6,selectionStartColumn:2,selectionStartLineNumber:6,startColumn:2,startLineNumber:6),source:'export+fn+foo(x:+@Vector(32,+u8))+u64+%7B%0A++++const+mvmsk:+u32+%3D+@bitCast(%0A++++++++x+%3E%3D+@as(@Vector(32,+u8),+@splat(0x80))%0A++++)%3B%0A++++return+(@as(u32,+mvmsk)+-%25+1)+%7C+(@as(u64,+mvmsk)+%3C%3C+32)%3B%0A%7D'),l:'5',n:'0',o:'Zig+source+%231',t:'0')),k:50.222314428109726,l:'4',m:100,n:'0',o:'',s:0,t:'0'),(g:!((h:compiler,i:(compiler:ztrunk,filters:(b:'0',binary:'1',binaryObject:'1',commentOnly:'0',debugCalls:'1',demangle:'0',directives:'0',execute:'1',intel:'0',libraryCode:'0',trim:'1',verboseDemangling:'0'),flagsViewOpen:'1',fontScale:20,fontUsePx:'0',j:2,lang:zig,libs:!(),options:'-O+ReleaseFast+-target+x86_64-linux+-mcpu%3Dznver5+-fomit-frame-pointer',overrides:!(),selection:(endColumn:1,endLineNumber:1,positionColumn:1,positionLineNumber:1,selectionStartColumn:1,selectionStartLineNumber:1,startColumn:1,startLineNumber:1),source:1),l:'5',n:'0',o:'+zig+trunk+(Editor+%231)',t:'0')),header:(),k:49.77768557189029,l:'4',m:100,n:'0',o:'',s:0,t:'0')),l:'2',m:100,n:'0',o:'',t:'0')),version:4)

This code:

```zig
export fn foo(x: @Vector(32, u8)) u64 {
    const mvmsk: u32 = @bitCast(
        x >= @as(@Vector(32, u8), @splat(0x80))
    );
 return (@as(u32, mvmsk) -% 1) | (@as(u64, mvmsk) << 32);
}
```

Results in:

```asm
foo:
        vpmovmskb       ecx, ymm0
        mov     eax, ecx
        dec     eax
        shl     rcx, 32
        or      rax, rcx
 vzeroupper
        ret
```

Could be:

```asm
foo:
 vpmovmskb       ecx, ymm0
        lea     eax, [rcx - 1]
        shl rcx, 32
        or      rax, rcx
        vzeroupper
 ret
```

Optimized LLVM IR:

```llvm
define dso_local i64 @foo(<32 x i8> %0) local_unnamed_addr {
Entry:
  %1 = icmp slt <32 x i8> %0, zeroinitializer
  %2 = bitcast <32 x i1> %1 to i32
  %3 = add i32 %2, -1
 %4 = zext i32 %2 to i64
  %5 = shl nuw i64 %4, 32
  %6 = zext i32 %3 to i64
  %7 = or disjoint i64 %5, %6
  ret i64 %7
}
```
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to