This patch is a follow up to patch #5.  It adds the support to use the Altivec
VSPLTB/VSPLTH instructions if you are creating a vector char or vector short
where each element is the same (but not constant) on 64-bit systems with direct
move.

The patch has been part of the larger set of patches for vector initialization
that I've been testing for awhile.  Most of those patches were submitted in
patch #5, and in this patch (#6).

There are a few patches remaining that cause a 4% performance degradation in
the zeusmp benchmark (everything else with the larger set of patches is about
the same performance).  I built and ran zeusmp, and these particular patches do
not cause the degradation.  I will submit a full run over the weekend just to
be sure.

I tested these patches on a big endian Power8 system and a little endian Power8
system, and previous versions have run on a big endian Power7 system.  There
were no regressions caused by these patches.  Can I install these patches in
the GCC 7 trunk after the patches in patch #5 are installed?

[gcc]
2016-08-19  Michael Meissner  <meiss...@linux.vnet.ibm.com>

        * config/rs6000/rs6000.c (rs6000_expand_vector_init): Add support
        for using VSPLTH/VSPLTB to initialize vector short and vector char
        vectors with all of the same element.

        * config/rs6000/vsx.md (VSX_SPLAT_I): New mode iterators and
        attributes to initialize V8HImode and V16QImode vectors with the
        same element.
        (VSX_SPLAT_COUNT): Likewise.
        (VSX_SPLAT_SUFFIX): Likewise.
        (vsx_vsplt<VSX_SPLAT_SUFFIX>_di): New insns to support
        initializing V8HImode and V16QImode vectors with the same
        element.

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c  (revision 239627)
+++ gcc/config/rs6000/rs6000.c  (working copy)
@@ -6827,6 +6827,32 @@ rs6000_expand_vector_init (rtx target, r
       return;
     }
 
+  /* Special case initializing vector short/char that are splats if we are on
+     64-bit systems with direct move.  */
+  if (all_same && TARGET_DIRECT_MOVE_64BIT
+      && (mode == V16QImode || mode == V8HImode))
+    {
+      rtx op0 = XVECEXP (vals, 0, 0);
+      rtx di_tmp = gen_reg_rtx (DImode);
+
+      if (!REG_P (op0))
+       op0 = force_reg (GET_MODE_INNER (mode), op0);
+
+      if (mode == V16QImode)
+       {
+         emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
+         emit_insn (gen_vsx_vspltb_di (target, di_tmp));
+         return;
+       }
+
+      if (mode == V8HImode)
+       {
+         emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
+         emit_insn (gen_vsx_vsplth_di (target, di_tmp));
+         return;
+       }
+    }
+
   /* Store value to stack temp.  Load vector element.  Splat.  However, splat
      of 64-bit items is not supported on Altivec.  */
   if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
Index: gcc/config/rs6000/vsx.md
===================================================================
--- gcc/config/rs6000/vsx.md    (revision 239588)
+++ gcc/config/rs6000/vsx.md    (working copy)
@@ -281,6 +281,16 @@ (define_mode_attr VSX_EX [(V16QI "v")
                          (V8HI  "v")
                          (V4SI  "wa")])
 
+;; Iterator for the 2 short vector types to do a splat from an integer
+(define_mode_iterator VSX_SPLAT_I [V16QI V8HI])
+
+;; Mode attribute to give the count for the splat instruction to splat
+;; the value in the 64-bit integer slot
+(define_mode_attr VSX_SPLAT_COUNT [(V16QI "7") (V8HI "3")])
+
+;; Mode attribute to give the suffix for the splat instruction
+(define_mode_attr VSX_SPLAT_SUFFIX [(V16QI "b") (V8HI "h")])
+
 ;; Constants for creating unspecs
 (define_c_enum "unspec"
   [UNSPEC_VSX_CONCAT
@@ -2766,6 +2776,16 @@ (define_insn "vsx_xxspltw_<mode>_direct"
   "xxspltw %x0,%x1,%2"
   [(set_attr "type" "vecperm")])
 
+;; V16QI/V8HI splat support on ISA 2.07
+(define_insn "vsx_vsplt<VSX_SPLAT_SUFFIX>_di"
+  [(set (match_operand:VSX_SPLAT_I 0 "altivec_register_operand" "=v")
+       (vec_duplicate:VSX_SPLAT_I
+        (truncate:<VS_scalar>
+         (match_operand:DI 1 "altivec_register_operand" "v"))))]
+  "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT"
+  "vsplt<VSX_SPLAT_SUFFIX> %0,%1,<VSX_SPLAT_COUNT>"
+  [(set_attr "type" "vecperm")])
+
 ;; V2DF/V2DI splat for use by vec_splat builtin
 (define_insn "vsx_xxspltd_<mode>"
   [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")

Reply via email to