Issue |
152900
|
Summary |
[AVX-512] Consider using `vpshufb(@splat(0xFF), x)` when selecting on `x < 0x80`
|
Labels |
new issue
|
Assignees |
|
Reporter |
Validark
|
For code like this: [Zig Godbolt](https://zig.godbo.lt/#z:OYLghAFBqd5QCxAYwPYBMCmBRdBLAF1QCcAaPECAMzwBtMA7AQwFtMQByARg9KtQYEAysib0QXAEx8BBAKoBnTAAUAHpwAMvAFYTStJg1AAvPMFJL6yAngGVG6AMKpaAVxYMQ0hwBk8DTAA5dwAjTGIQADZSAAdUBUJbBmc3Dy9Y%2BMSBPwDgljCI6MtMayShAiZiAhT3T2li0oFyyoIcoNDwqIsKqpq0%2Bp7W/3b8zsiASgtUV2Jkdg5MVTiqgGoqBjXUVAh1FYABADUSomIISIAWUhXXAA5JlYBPEH2j6xIzy%2Bu78Zfj94urrcfgBSADsACFgRoAIIrOErYiYAgzDZ7BoECC3K6qFbAgDMjn2TAUEEOf1OAK%2B9zRMQMGI0qhuGnGkyhsPhHIepDZHI5e2JpNeJw%2BgLuVxpdIgzNZMI543xkJhYIAImy2YtlgQ1hsQpUds8yW8KZ8gVcnr8jSKqT9DcLKUDcRCefDEcjiBsHrjJJEVgA3GIKBCuKghUkC23/E1i/YKWlMemqABiiZZ2PleMV0JVaphaAYCi1BfQuLxyv2eBYmogwMkkiLNck6czeYLKxCrjoNg2%2BLLewrVYb7c7/gbTZz0JbWqEcgA4jPsEIACrYZUAfQO2Ecq6EAEkAFrYZ74xNoSs2Nir/xansrIsAOgSLHQD9cwGAmALQpIPkYwAICETEhHBiVxMRuK4h1oLs72QECxyVGF1j9AMgxDCAKhCcQVkMB4CAeGJMCufx8DmBRnhwvCCJ%2BPAqBWUkEmMTAAHkqFJRd8OY1jiLwUiWRLbAYzwRiWLYjiRIw%2BgWRtdiCJE7jeJWTBaCUfYZM49CmEwzAQSdWV4Roui%2B2SVAzwrTAIB%2BEgVmnOcF2XNcNy3XcDxLVVSxWBhXFoWgdMzXl4V9SoEQ/LyCANNS5IYEiPxBdzXCizAaACdAFWdfz%2BGIOiNDvO89hCQghCE9S9giriop4mKfgAehWO5HUcPAwUJME/P83lJxWYjFlczrytI4EAFZwUaga3NatqXRCqDBuGwayxvAyIC6nF8WwG8GSZH4NEU5TMBWCTMBm5avQGwz8uEIqRJKsTWIOviaruObUr0/zswQ9l/NdFFgoUULno%2Bx1VXejlFrRS7WOu2SyuihQ%2BNWwThIh0qNK0qTHXG3kvvdZDA2DUN70fZ9EQI%2BN6PB0SoaWvrKquO6iOp2H/o5N6sxezraLJxGKfUu7YoEmz5yXFd103bd92wO9j18tLeUCzKYhaMjcSGsGuchnnNMk6rrNnQX7JFpzxclvFEzmiNTgFuzhccsWD2NxNRVist4qwJLMBSjMZZBhhaGGTZMurb0FaqBQrmyu8dMcAAqBRaFQAgiKa9Gvba2P47vKOev9XG0IJitn0WAhiCYawUfoIiVkzy2hYc0XnIl48rmr/Wbfr%2B37jzp870L4vS/kj8K6r3Wrdrw27cbnXbJrg3bYbk2WSZtqWYmrHUXOxxiQxYOCEZz22ZZmWOrYf8MAUHqC2IVxrGTtmOUL8INiQvYG2830WDvRlIjvJhfVUAapDvChYMd4Qh3n/pIBsgpyRWlNBaO0UY%2BLmxgY9PeAM76qAIA/bU%2BwX60Dfh/G4X8f6qEkIAnOIDIFILxNIKk4ovynGoY7G09CICMKpIvfy99sZP1wfgz%2BD4FBKDxGQ1CIC7xSBuJQlhXBoi0Lge8GRTD5GnEUew1BzNQRjXHByVet4ADuhBkAIEMsjXmN80HwiQfaOqPZ4aLVPDEc8e17yLkqO%2BAgBCv5UEwPGGYmAhBIgABICkgtBWCrg7zeN8YiUOKxv6/3ASEPRfFj4IFPrlXh79%2BHEPASI4BoDwGQIOvTGGPwlIqT2A4ugOBiDEHeA2Rwyg5D7TcUiFYBhkAAGsz6/RiJqf2ONUIhEKbWGUFi4RUJoQ6WxeIBL2JMo4syt4CDPlccQdxnjIk%2BLdP4oJISOxQX8DBECWzokDzicQxsPxUnpOfrWV%2BWTCHxJIXkqgFDaxl0Ir1UpO0KlVPoNgWp9TayNOaT0dx7SS7dNvK4PpJAtQZUGXjSQA1Iijm5LfSx0jZHTNLHYjmDinHLNWa0jx/Cok7ICQQYJJJQlHPCacnZsSBFCJSUiNJ6AFAZPuXgx5X9BFCNeWIiRRTNZfP7rDX5e1KkLOqYCuppwGlNJaestpHToW9P6Yi7OQyRWjIxeMqVrl4YysrHKoFpx7xUBYB4wlZllDECvIHSQO4GCBV9sWNx7hGBakontBWgj3b7VQEikMR4nSaMGo4BgDYriSwhOrcSYr5SaNTDLbMqCD4wg4JMWgnABq8E8BwLQpBUCcD3GYW80xZh7RrHiHgpAwrFpzZMTpIABqkNrPWgAnBobt1DCE3DxKi/QnBziFs0LwMtHBeBkQ0I2ydkw4CwCQP88I5BKBroiMYIu8VOl8E7OEMiEAQiTtIPlZgxAnjcF4BeyoDwmIhG0McTgDbTxsEEExH216S1YHbMADe3kyI3tIFgFghhgDiGbaBvAiJSi%2Bg/GexYJRXCYNfbwK8Skz2%2BxCMXK9zgsBnqLhWdDpAEPEBCPETAypMDgaML7Iwi6%2BAGGAAoA4eBMB6KYgRItDb%2BCCBEGIdgXBLj8fkEoNQZ7dBcH0BBkAphzA4bIpASYqBFkCGAwAWiYgAJSUj4pQiZN4rE0%2BCtpn9VwXE0wx1wOJNMsHCT2YwbrwinU0/wFghB3PFzYJpuImHiBTvI46rAymLIWH040TwEAHB9E8FIUgvhhh5AKBIGTcQEg2AEHFvQGWsgMDaClzooIItWCywwZovQXC1AkPUSL5XKtDFyB0CIXAZMFhaDl9rgxCstZAKCSYChq1zAkLm/NE7oPTpWAp/al8GCdLorgQgVk61cHGLwJtWhUxtvOKCO8oJQSRCZL2yIoIuD1ouJcPNHBx2kBYBIDQ86i0lunbOkA87NsttHRwSQE2XucA24uyY5GEh2HOEAA%3D%3D%3D)
```zig
export fn foo(x: @Vector(64, u8), y: @Vector(64, u8)) @Vector(64, u8) {
return @select(u8, x < @as(@Vector(64, u8), @splat(0x80)),
y,
@as(@Vector(64, u8), @splat(0)),
);
}
```
LLVM version: ([Godbolt](https://llvm.godbo.lt/#g:!((g:!((g:!((h:codeEditor,i:(filename:'1',fontScale:14,fontUsePx:'0',j:1,lang:llvm,selection:(endColumn:2,endLineNumber:6,positionColumn:2,positionLineNumber:6,selectionStartColumn:2,selectionStartLineNumber:6,startColumn:2,startLineNumber:6),source:'define+dso_local+%3C64+x+i8%3E+@foo(%3C64+x+i8%3E+%250,+%3C64+x+i8%3E+%251)+local_unnamed_addr+%7B%0AEntry:%0A++%25.inv+%3D+icmp+slt+%3C64+x+i8%3E+%250,+zeroinitializer%0A++%252+%3D+select+%3C64+x+i1%3E+%25.inv,+%3C64+x+i8%3E+zeroinitializer,+%3C64+x+i8%3E+%251%0A++ret+%3C64+x+i8%3E+%252%0A%7D'),l:'5',n:'0',o:'LLVM+IR+source+%231',t:'0')),k:50.91543156059285,l:'4',n:'0',o:'',s:0,t:'0'),(g:!((h:compiler,i:(compiler:llctrunk,filters:(b:'0',binary:'1',binaryObject:'1',commentOnly:'0',debugCalls:'1',demangle:'0',directives:'0',execute:'1',intel:'0',libraryCode:'0',trim:'1',verboseDemangling:'0'),flagsViewOpen:'1',fontScale:14,fontUsePx:'0',j:1,lang:llvm,libs:!(),options:'-O3+--mtriple%3Dx86_64-linux+-mcpu%3Dznver5',overrides:!(),selection:(endColumn:1,endLineNumber:1,positionColumn:1,positionLineNumber:1,selectionStartColumn:1,selectionStartLineNumber:1,startColumn:1,startLineNumber:1),source:1),l:'5',n:'0',o:'+llc+(trunk)+(Editor+%231)',t:'0')),k:49.08456843940715,l:'4',n:'0',o:'',s:0,t:'0')),l:'2',m:100,n:'0',o:'',t:'0')),version:4))
```llvm
define dso_local <64 x i8> @foo(<64 x i8> %0, <64 x i8> %1) local_unnamed_addr {
Entry:
%.inv = icmp slt <64 x i8> %0, zeroinitializer
%2 = select <64 x i1> %.inv, <64 x i8> zeroinitializer, <64 x i8> %1
ret <64 x i8> %2
}
```
We used to get:
```asm
vpmovb2m k0, zmm0
vpmovm2b zmm0, k0
vpandnq zmm0, zmm0, zmm1
```
Now we get:
```asm
vpmovb2m k0, zmm0
knotq k1, k0
vmovdqu8 zmm0 {k1} {z}, zmm1
```
However, I thought it might be a good idea in some situations to use this technique:
```asm
vpternlogd zmm2, zmm2, zmm2, 255
vpshufb zmm0, zmm2, zmm0
vpandq zmm0, zmm0, zmm1
```
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs