https://gcc.gnu.org/bugzilla/show_bug.cgi?id=115146

            Bug ID: 115146
           Summary: [15 Regression] Incorrect 8-byte vectorization:
                    psllw/psraw confusion
           Product: gcc
           Version: 14.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: slyfox at gcc dot gnu.org
  Target Milestone: ---

Initially observed as a test failures on highway-1.0.7 on
r15-644-g7422e050f33dd9 compiler:
    HwyReverseTestGroup/HwyReverseTest.TestAllReverseLaneBytes/EMU128 FAILED

Self-contained example:

// $ cat bug.c
typedef unsigned char u8;

__attribute__((noipa))
static void fill_src(u8 * src) {
    src[0] = 0x00; src[1] = 0xff;
}

__attribute__((noipa))
static void assert_dst(const u8 * dst) {
    if (dst[0] != 0xff) __builtin_trap();
    if (dst[1] != 0x00) __builtin_trap();
}

int main() {
    u8 src[8] __attribute__((aligned(16))) = { 0 };
    u8 dst[8] __attribute__((aligned(16))) = { 0 };

    // place 0x00 into src[0] and 0xFF into src[1]
    fill_src(src);

    // swap bytes:
    // place 0xFF into dst[0], 0x00 into dst[1]
    for (unsigned long i = 0; i < 8; i += 2) {
        dst[i + 0] = src[i + 1];
        dst[i + 1] = src[i + 0];
    }

    // make sure bytes swapped
    assert_dst(dst);
}

Triggering:

$ gcc bug.c -o a -O1 && ./a
$ gcc bug.c -o a -O2 && ./a
Illegal instruction (core dumped)

A bit of analysis:

Dump of assembler code for function main:
   0x0000000000401030 <+0>:     sub    $0x28,%rsp
   0x0000000000401034 <+4>:     mov    %rsp,%rdi
   0x0000000000401037 <+7>:     movq   $0x0,(%rsp)
   0x000000000040103f <+15>:    movq   $0x0,0x10(%rsp)
   0x0000000000401048 <+24>:    call   0x401170 <fill_src>
   0x000000000040104d <+29>:    movq   (%rsp),%xmm0
   0x0000000000401052 <+34>:    lea    0x10(%rsp),%rdi
   0x0000000000401057 <+39>:    movdqa %xmm0,%xmm1
   0x000000000040105b <+43>:    psllw  $0x8,%xmm0
   0x0000000000401060 <+48>:    psraw  $0x8,%xmm1 ; <<<- why arithmetic shift?
should be psrllw
   0x0000000000401065 <+53>:    por    %xmm0,%xmm1
   0x0000000000401069 <+57>:    movq   %xmm1,0x10(%rsp)
   0x000000000040106f <+63>:    call   0x401180 <assert_dst>
   0x0000000000401074 <+68>:    xor    %eax,%eax
   0x0000000000401076 <+70>:    add    $0x28,%rsp
   0x000000000040107a <+74>:    ret
End of assembler dump.

Here `psraw` should have been `psrllw` to avoid sign bit extension.

$ gcc -v
Using built-in specs.
COLLECT_GCC=/<<NIX>>/gcc-15.0.0/bin/gcc
COLLECT_LTO_WRAPPER=/<<NIX>>/gcc-15.0.0/libexec/gcc/x86_64-unknown-linux-gnu/15.0.0/lto-wrapper
Target: x86_64-unknown-linux-gnu
Configured with: ../source/configure --prefix=/<<NIX>>/gcc-15.0.0
--with-gmp-include=/<<NIX>>/gmp-6.3.0-dev/include
--with-gmp-lib=/<<NIX>>/gmp-6.3.0/lib
--with-mpfr-include=/<<NIX>>/mpfr-4.2.1-dev/include
--with-mpfr-lib=/<<NIX>>/mpfr-4.2.1/lib --with-mpc=/<<NIX>>/libmpc-1.3.1
--with-native-system-header-dir=/<<NIX>>/glibc-2.39-52-dev/include
--with-build-sysroot=/
--with-gxx-include-dir=/<<NIX>>/gcc-15.0.0/include/c++/15.0.0/
--program-prefix= --enable-lto --disable-libstdcxx-pch
--without-included-gettext --with-system-zlib --enable-checking=release
--enable-static --enable-languages=c,c++ --disable-multilib --enable-plugin
--disable-libcc1 --with-isl=/<<NIX>>/isl-0.20 --disable-bootstrap
--build=x86_64-unknown-linux-gnu --host=x86_64-unknown-linux-gnu
--target=x86_64-unknown-linux-gnu
Thread model: posix
Supported LTO compression algorithms: zlib
gcc version 15.0.0 99999999 (experimental) (GCC)

Reply via email to