https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113345
Bug ID: 113345
Summary: miss optimization for psign{b,w,d}.
Product: gcc
Version: 14.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: target
Assignee: unassigned at gcc dot gnu.org
Reporter: liuhongt at gcc dot gnu.org
Target Milestone: ---
void
foo (short* __restrict a, short* b, short* c)
{
for (int i = 0; i != 1000; i++)
{
a[i] = c[i] < 0 ? -b[i] : b[i];
}
}
gcc -O2 -mavx2
foo(char*, char*, char*):
xorl %eax, %eax
vpxor %xmm2, %xmm2, %xmm2
.L2:
vmovq (%rsi,%rax), %xmm0
vmovq (%rdx,%rax), %xmm1
vpsubb %xmm0, %xmm2, %xmm3
vpcmpgtb %xmm1, %xmm2, %xmm1
vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
vmovq %xmm0, (%rdi,%rax)
addq $8, %rax
cmpq $1000, %rax
jne .L2
ret
it can be optimized with psignw.
22115(define_insn "<ssse3_avx2>_psign<mode>3"
22116 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
22117 (unspec:VI124_AVX2
22118 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
22119 (match_operand:VI124_AVX2 2 "vector_operand" "xja,xjm")]
22120 UNSPEC_PSIGN))]
maybe we can just refactor the pattern as blow, then combine can generate the
pattern for us.
22115(define_insn "<ssse3_avx2>_psign<mode>3"
22116 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
22117 (unspec:VI124_AVX2
22118 [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
(neg:VI124:(match_dup 1)
22119 (match_operand:VI124_AVX2 2 "vector_operand" "xja,xjm")]
22120 UNSPEC_PBLENDV))]