Hi, this patch implements a byte swap for a V8HImode vector via an element rotate by 8 bits.
Bootstrapped and regtested, no regressions. Is it OK? Regards Robin gcc/ChangeLog: PR target/100867 * config/s390/vector.md: Add special case for V8HImode. gcc/testsuite/ChangeLog: * gcc.target/s390/zvector/vec-revb-short.c: New test. --- gcc/config/s390/vector.md | 35 ++++++++++++------- .../gcc.target/s390/zvector/vec-revb-short.c | 13 +++++++ 2 files changed, 35 insertions(+), 13 deletions(-) create mode 100644 gcc/testsuite/gcc.target/s390/zvector/vec-revb-short.c diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md index 2207f39b80e4..6f46bed03e00 100644 --- a/gcc/config/s390/vector.md +++ b/gcc/config/s390/vector.md @@ -2898,22 +2898,31 @@ (define_expand "bswap<mode>" for (int i = 0; i < 16; i++) perm_rtx[i] = GEN_INT (perm[i]); - operands[2] = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm_rtx)); - - /* Without vxe2 we do not have byte swap instructions dealing - directly with memory operands. So instead of waiting until - reload to fix that up switch over to vector permute right - now. */ - if (!TARGET_VXE2) + if (!TARGET_VXE2 && <MODE>mode == V8HImode) { - rtx in = force_reg (V16QImode, simplify_gen_subreg (V16QImode, operands[1], <MODE>mode, 0)); - rtx permute = force_reg (V16QImode, force_const_mem (V16QImode, operands[2])); - rtx out = gen_reg_rtx (V16QImode); - - emit_insn (gen_vec_permv16qi (out, in, in, permute)); - emit_move_insn (operands[0], simplify_gen_subreg (<MODE>mode, out, V16QImode, 0)); + /* A byte swap for a short is just a rotate by 8 bits. */ + emit_insn (gen_rotlv8hi3 (operands[0], operands[1], GEN_INT (8))); DONE; } + else + { + operands[2] = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm_rtx)); + + /* Without vxe2 we do not have byte swap instructions dealing + directly with memory operands. So instead of waiting until + reload to fix that up switch over to vector permute right + now. */ + if (!TARGET_VXE2) + { + rtx in = force_reg (V16QImode, simplify_gen_subreg (V16QImode, operands[1], <MODE>mode, 0)); + rtx permute = force_reg (V16QImode, force_const_mem (V16QImode, operands[2])); + rtx out = gen_reg_rtx (V16QImode); + + emit_insn (gen_vec_permv16qi (out, in, in, permute)); + emit_move_insn (operands[0], simplify_gen_subreg (<MODE>mode, out, V16QImode, 0)); + DONE; + } + } }) ; Switching late to the reg-reg variant requires the vector permute diff --git a/gcc/testsuite/gcc.target/s390/zvector/vec-revb-short.c b/gcc/testsuite/gcc.target/s390/zvector/vec-revb-short.c new file mode 100644 index 000000000000..bf58a0e12e74 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/zvector/vec-revb-short.c @@ -0,0 +1,13 @@ +/* Test that we use verllh for byte reversing a vector of shorts. */ +/* { dg-do compile { target { s390*-*-* } } } */ +/* { dg-options "-O2 -march=z13 -mzvector -mzarch -fno-unroll-loops" } */ + +/* { dg-final { scan-assembler-times "verllh\t" 1 } } */ +/* { dg-final { scan-assembler-not "vperm" } } */ + +#include <vecintrin.h> + +vector short revb (vector short a) +{ + return vec_revb (a); +} -- 2.31.1