Exploit the fact that instruction VLGV zeros excessive bits of a GPR.

gcc/ChangeLog:

        * config/s390/vector.md (bhfgq): Add scalar modes.
        (*movdi<mode>_zero_extend_A): New insn.
        (*movsi<mode>_zero_extend_A): New insn.
        (*movdi<mode>_zero_extend_B): New insn.
        (*movsi<mode>_zero_extend_B): New insn.

gcc/testsuite/ChangeLog:

        * gcc.target/s390/vector/vlgv-zero-extend-1.c: New test.
---
 gcc/config/s390/vector.md                     | 60 ++++++++++++++--
 .../s390/vector/vlgv-zero-extend-1.c          | 71 +++++++++++++++++++
 2 files changed, 125 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/vlgv-zero-extend-1.c

diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index c63360f8bcc..12bbeb64072 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -149,13 +149,13 @@
 
 ; The instruction suffix for integer instructions and instructions
 ; which do not care about whether it is floating point or integer.
-(define_mode_attr bhfgq[(V1QI "b") (V2QI "b") (V4QI "b") (V8QI "b") (V16QI "b")
-                       (V1HI "h") (V2HI "h") (V4HI "h") (V8HI "h")
-                       (V1SI "f") (V2SI "f") (V4SI "f")
-                       (V1DI "g") (V2DI "g")
+(define_mode_attr bhfgq[(V1QI "b") (V2QI "b") (V4QI "b") (V8QI "b") (V16QI 
"b") (QI "b")
+                       (V1HI "h") (V2HI "h") (V4HI "h") (V8HI "h") (HI "h")
+                       (V1SI "f") (V2SI "f") (V4SI "f") (SI "f")
+                       (V1DI "g") (V2DI "g") (DI "g")
                        (V1TI "q") (TI "q")
-                       (V1SF "f") (V2SF "f") (V4SF "f")
-                       (V1DF "g") (V2DF "g")
+                       (V1SF "f") (V2SF "f") (V4SF "f") (SF "f")
+                       (V1DF "g") (V2DF "g") (DF "g")
                        (V1TF "q") (TF "q")])
 
 ; This is for vmalhw. It gets an 'w' attached to avoid confusion with
@@ -501,6 +501,54 @@
                         SIL,SIL,RI,RI,RRE,RRE,RIL,RR,RXY,RXY,RIL")])
 
 
+; Instructions vlgvb, vlgvh, vlgvf zero all remaining bits of a GPR, i.e.,
+; an implicit zero extend is done.
+
+(define_insn "*movdi<mode>_zero_extend_A"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+       (zero_extend:DI (match_operand:SINT 1 "register_operand" "v")))]
+  "TARGET_VX"
+  "vlgv<bhfgq>\t%0,%v1,0"
+  [(set_attr "op_type" "VRS")])
+
+(define_insn "*movsi<mode>_zero_extend_A"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+       (zero_extend:SI (match_operand:HQI 1 "register_operand" "v")))]
+  "TARGET_VX"
+  "vlgv<bhfgq>\t%0,%v1,0"
+  [(set_attr "op_type" "VRS")])
+
+(define_mode_iterator VLGV_DI [V1QI V2QI V4QI V8QI V16QI
+                              V1HI V2HI V4HI V8HI
+                              V1SI V2SI V4SI])
+(define_insn "*movdi<mode>_zero_extend_B"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+       (zero_extend:DI (vec_select:<non_vec>
+                         (match_operand:VLGV_DI 1 "register_operand" "v")
+                         (parallel [(match_operand:SI 2 "const_int_operand" 
"n")]))))]
+  "TARGET_VX"
+{
+  operands[2] = GEN_INT (UINTVAL (operands[2]) & (GET_MODE_NUNITS (<MODE>mode) 
- 1));
+  return "vlgv<bhfgq>\t%0,%v1,%Y2";
+}
+  [(set_attr "op_type" "VRS")
+   (set_attr "mnemonic" "vlgv<bhfgq>")])
+
+(define_mode_iterator VLGV_SI [V1QI V2QI V4QI V8QI V16QI
+                              V1HI V2HI V4HI V8HI])
+(define_insn "*movsi<mode>_zero_extend_B"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+       (zero_extend:SI (vec_select:<non_vec>
+                         (match_operand:VLGV_SI 1 "register_operand" "v")
+                         (parallel [(match_operand:SI 2 "const_int_operand" 
"n")]))))]
+  "TARGET_VX"
+{
+  operands[2] = GEN_INT (UINTVAL (operands[2]) & (GET_MODE_NUNITS (<MODE>mode) 
- 1));
+  return "vlgv<bhfgq>\t%0,%v1,%Y2";
+}
+  [(set_attr "op_type" "VRS")
+   (set_attr "mnemonic" "vlgv<bhfgq>")])
+
 ; vec_load_lanes?
 
 ; vec_store_lanes?
diff --git a/gcc/testsuite/gcc.target/s390/vector/vlgv-zero-extend-1.c 
b/gcc/testsuite/gcc.target/s390/vector/vlgv-zero-extend-1.c
new file mode 100644
index 00000000000..11df6c1869a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/vlgv-zero-extend-1.c
@@ -0,0 +1,71 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target s390_vx } */
+/* { dg-additional-options "-O2" } */
+/* { dg-final { scan-assembler-not {\tllg?[fhc]r\t} } } */
+
+typedef unsigned char __attribute__ ((vector_size (1))) V1QI;
+typedef unsigned char __attribute__ ((vector_size (2))) V2QI;
+typedef unsigned char __attribute__ ((vector_size (4))) V4QI;
+typedef unsigned char __attribute__ ((vector_size (8))) V8QI;
+typedef unsigned char __attribute__ ((vector_size (16))) V16QI;
+
+typedef unsigned short __attribute__ ((vector_size (2))) V1HI;
+typedef unsigned short __attribute__ ((vector_size (4))) V2HI;
+typedef unsigned short __attribute__ ((vector_size (8))) V4HI;
+typedef unsigned short __attribute__ ((vector_size (16))) V8HI;
+
+typedef unsigned int __attribute__ ((vector_size (4))) V1SI;
+typedef unsigned int __attribute__ ((vector_size (8))) V2SI;
+typedef unsigned int __attribute__ ((vector_size (16))) V4SI;
+
+unsigned short ushort;
+unsigned int uint;
+
+#define TEST(T, U, I) \
+  unsigned T test_ ## I ## _ ## U (U x) { return x[I]; } \
+  void       test_ ## I ## _ ## U ## _ushort (U x) { ushort = x[I]; } \
+  void       test_ ## I ## _ ## U ## _uint (U x) { uint = x[I]; }
+
+#define TEST1(T, U) \
+  TEST(T, U, 0)
+
+#define TEST2(T, U) \
+  TEST1 (T, U) \
+  TEST(T, U, 1)
+
+#define TEST4(T, U) \
+  TEST2 (T, U) \
+  TEST(T, U, 2) \
+  TEST(T, U, 3)
+
+#define TEST8(T, U) \
+  TEST4 (T, U) \
+  TEST(T, U, 4) \
+  TEST(T, U, 5) \
+  TEST(T, U, 6) \
+  TEST(T, U, 7)
+
+#define TEST16(T, U) \
+  TEST8 (T, U) \
+  TEST(T, U, 9) \
+  TEST(T, U, 10) \
+  TEST(T, U, 11) \
+  TEST(T, U, 12) \
+  TEST(T, U, 13) \
+  TEST(T, U, 14) \
+  TEST(T, U, 15)
+
+TEST1 (char, V1QI)
+TEST2 (char, V2QI)
+TEST4 (char, V4QI)
+TEST8 (char, V8QI)
+TEST16 (char, V16QI)
+
+TEST1 (short, V1HI)
+TEST2 (short, V2HI)
+TEST4 (short, V4HI)
+TEST8 (short, V8HI)
+
+TEST1 (int, V1SI)
+TEST2 (int, V2SI)
+TEST4 (int, V4SI)
-- 
2.49.0

Reply via email to