https://gcc.gnu.org/bugzilla/show_bug.cgi?id=117839
Bug ID: 117839
Summary: Redundant vector XOR instructions
Product: gcc
Version: 15.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: target
Assignee: unassigned at gcc dot gnu.org
Reporter: hjl.tools at gmail dot com
Target Milestone: ---
Target: x86-64
[hjl@gnu-tgl-3 zero-1]$ cat z.c
#include <stddef.h>
#include <string.h>
float
clear_memory (void *mem, size_t clearsize)
{
/* Unroll clear memory size up to 9 * size_t bytes. We know
that contents have an odd number of size_t-sized words;
minimally 3 words. */
size_t *d = (size_t *) mem;
size_t nclears = clearsize / sizeof (size_t);
if (nclears > 17)
memset (mem, 0, clearsize);
/* Use overlapping stores with 2 branch, instead of up to 6. */
*(d + 0) = 0;
*(d + 1) = 0;
*(d + 2) = 0;
if (nclears > 9)
{
*(d + 5) = 0;
*(d + 5 + 1) = 0;
*(d + 5 + 2) = 0;
*(d + 5 + 3) = 0;
*(d + nclears - 8) = 0;
*(d + nclears - 8 + 1) = 0;
*(d + nclears - 8 + 2) = 0;
*(d + nclears - 8 + 3) = 0;
}
else
{
*(d + 1) = 0;
*(d + 2) = 0;
*(d + 3) = 0;
*(d + 4) = 0;
*(d + nclears - 4) = 0;
*(d + nclears - 4 + 1) = 0;
*(d + nclears - 4 + 2) = 0;
*(d + nclears - 4 + 3) = 0;
}
return nclears;
}
[hjl@gnu-tgl-3 zero-1]$ make y.s
/export/build/gnu/tools-build/gcc-gitlab-debug/build-x86_64-linux/gcc/xgcc
-B/export/build/gnu/tools-build/gcc-gitlab-debug/build-x86_64-linux/gcc/ -O2
-march=x86-64-v3 -S y.c
[hjl@gnu-tgl-3 zero-1]$ grep xor y.s
vpxor %xmm0, %xmm0, %xmm0
vxorps %xmm0, %xmm0, %xmm0
vpxor %xmm0, %xmm0, %xmm0
vxorps %xmm0, %xmm0, %xmm0
xorl %esi, %esi
vpxor %xmm0, %xmm0, %xmm0
vpxor %xmm0, %xmm0, %xmm0
vxorps %xmm0, %xmm0, %xmm0
[hjl@gnu-tgl-3 zero-1]$
There are 7 vector XOR instructions. But one is sufficient.