Exploit the fact that instruction VLGV zeros excessive bits of a GPR. gcc/ChangeLog:
* config/s390/vector.md (bhfgq): Add scalar modes. (*movdi<mode>_zero_extend_A): New insn. (*movsi<mode>_zero_extend_A): New insn. (*movdi<mode>_zero_extend_B): New insn. (*movsi<mode>_zero_extend_B): New insn. gcc/testsuite/ChangeLog: * gcc.target/s390/vector/vlgv-zero-extend-1.c: New test. --- gcc/config/s390/vector.md | 60 ++++++++++++++-- .../s390/vector/vlgv-zero-extend-1.c | 71 +++++++++++++++++++ 2 files changed, 125 insertions(+), 6 deletions(-) create mode 100644 gcc/testsuite/gcc.target/s390/vector/vlgv-zero-extend-1.c diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md index c63360f8bcc..12bbeb64072 100644 --- a/gcc/config/s390/vector.md +++ b/gcc/config/s390/vector.md @@ -149,13 +149,13 @@ ; The instruction suffix for integer instructions and instructions ; which do not care about whether it is floating point or integer. -(define_mode_attr bhfgq[(V1QI "b") (V2QI "b") (V4QI "b") (V8QI "b") (V16QI "b") - (V1HI "h") (V2HI "h") (V4HI "h") (V8HI "h") - (V1SI "f") (V2SI "f") (V4SI "f") - (V1DI "g") (V2DI "g") +(define_mode_attr bhfgq[(V1QI "b") (V2QI "b") (V4QI "b") (V8QI "b") (V16QI "b") (QI "b") + (V1HI "h") (V2HI "h") (V4HI "h") (V8HI "h") (HI "h") + (V1SI "f") (V2SI "f") (V4SI "f") (SI "f") + (V1DI "g") (V2DI "g") (DI "g") (V1TI "q") (TI "q") - (V1SF "f") (V2SF "f") (V4SF "f") - (V1DF "g") (V2DF "g") + (V1SF "f") (V2SF "f") (V4SF "f") (SF "f") + (V1DF "g") (V2DF "g") (DF "g") (V1TF "q") (TF "q")]) ; This is for vmalhw. It gets an 'w' attached to avoid confusion with @@ -501,6 +501,54 @@ SIL,SIL,RI,RI,RRE,RRE,RIL,RR,RXY,RXY,RIL")]) +; Instructions vlgvb, vlgvh, vlgvf zero all remaining bits of a GPR, i.e., +; an implicit zero extend is done. + +(define_insn "*movdi<mode>_zero_extend_A" + [(set (match_operand:DI 0 "register_operand" "=d") + (zero_extend:DI (match_operand:SINT 1 "register_operand" "v")))] + "TARGET_VX" + "vlgv<bhfgq>\t%0,%v1,0" + [(set_attr "op_type" "VRS")]) + +(define_insn "*movsi<mode>_zero_extend_A" + [(set (match_operand:SI 0 "register_operand" "=d") + (zero_extend:SI (match_operand:HQI 1 "register_operand" "v")))] + "TARGET_VX" + "vlgv<bhfgq>\t%0,%v1,0" + [(set_attr "op_type" "VRS")]) + +(define_mode_iterator VLGV_DI [V1QI V2QI V4QI V8QI V16QI + V1HI V2HI V4HI V8HI + V1SI V2SI V4SI]) +(define_insn "*movdi<mode>_zero_extend_B" + [(set (match_operand:DI 0 "register_operand" "=d") + (zero_extend:DI (vec_select:<non_vec> + (match_operand:VLGV_DI 1 "register_operand" "v") + (parallel [(match_operand:SI 2 "const_int_operand" "n")]))))] + "TARGET_VX" +{ + operands[2] = GEN_INT (UINTVAL (operands[2]) & (GET_MODE_NUNITS (<MODE>mode) - 1)); + return "vlgv<bhfgq>\t%0,%v1,%Y2"; +} + [(set_attr "op_type" "VRS") + (set_attr "mnemonic" "vlgv<bhfgq>")]) + +(define_mode_iterator VLGV_SI [V1QI V2QI V4QI V8QI V16QI + V1HI V2HI V4HI V8HI]) +(define_insn "*movsi<mode>_zero_extend_B" + [(set (match_operand:SI 0 "register_operand" "=d") + (zero_extend:SI (vec_select:<non_vec> + (match_operand:VLGV_SI 1 "register_operand" "v") + (parallel [(match_operand:SI 2 "const_int_operand" "n")]))))] + "TARGET_VX" +{ + operands[2] = GEN_INT (UINTVAL (operands[2]) & (GET_MODE_NUNITS (<MODE>mode) - 1)); + return "vlgv<bhfgq>\t%0,%v1,%Y2"; +} + [(set_attr "op_type" "VRS") + (set_attr "mnemonic" "vlgv<bhfgq>")]) + ; vec_load_lanes? ; vec_store_lanes? diff --git a/gcc/testsuite/gcc.target/s390/vector/vlgv-zero-extend-1.c b/gcc/testsuite/gcc.target/s390/vector/vlgv-zero-extend-1.c new file mode 100644 index 00000000000..11df6c1869a --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/vector/vlgv-zero-extend-1.c @@ -0,0 +1,71 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target s390_vx } */ +/* { dg-additional-options "-O2" } */ +/* { dg-final { scan-assembler-not {\tllg?[fhc]r\t} } } */ + +typedef unsigned char __attribute__ ((vector_size (1))) V1QI; +typedef unsigned char __attribute__ ((vector_size (2))) V2QI; +typedef unsigned char __attribute__ ((vector_size (4))) V4QI; +typedef unsigned char __attribute__ ((vector_size (8))) V8QI; +typedef unsigned char __attribute__ ((vector_size (16))) V16QI; + +typedef unsigned short __attribute__ ((vector_size (2))) V1HI; +typedef unsigned short __attribute__ ((vector_size (4))) V2HI; +typedef unsigned short __attribute__ ((vector_size (8))) V4HI; +typedef unsigned short __attribute__ ((vector_size (16))) V8HI; + +typedef unsigned int __attribute__ ((vector_size (4))) V1SI; +typedef unsigned int __attribute__ ((vector_size (8))) V2SI; +typedef unsigned int __attribute__ ((vector_size (16))) V4SI; + +unsigned short ushort; +unsigned int uint; + +#define TEST(T, U, I) \ + unsigned T test_ ## I ## _ ## U (U x) { return x[I]; } \ + void test_ ## I ## _ ## U ## _ushort (U x) { ushort = x[I]; } \ + void test_ ## I ## _ ## U ## _uint (U x) { uint = x[I]; } + +#define TEST1(T, U) \ + TEST(T, U, 0) + +#define TEST2(T, U) \ + TEST1 (T, U) \ + TEST(T, U, 1) + +#define TEST4(T, U) \ + TEST2 (T, U) \ + TEST(T, U, 2) \ + TEST(T, U, 3) + +#define TEST8(T, U) \ + TEST4 (T, U) \ + TEST(T, U, 4) \ + TEST(T, U, 5) \ + TEST(T, U, 6) \ + TEST(T, U, 7) + +#define TEST16(T, U) \ + TEST8 (T, U) \ + TEST(T, U, 9) \ + TEST(T, U, 10) \ + TEST(T, U, 11) \ + TEST(T, U, 12) \ + TEST(T, U, 13) \ + TEST(T, U, 14) \ + TEST(T, U, 15) + +TEST1 (char, V1QI) +TEST2 (char, V2QI) +TEST4 (char, V4QI) +TEST8 (char, V8QI) +TEST16 (char, V16QI) + +TEST1 (short, V1HI) +TEST2 (short, V2HI) +TEST4 (short, V4HI) +TEST8 (short, V8HI) + +TEST1 (int, V1SI) +TEST2 (int, V2SI) +TEST4 (int, V4SI) -- 2.49.0