390 Add vector scalar instruction support.

Andreas Krebbel Mon, 11 May 2015 06:26:51 -0700

With this patch GCC makes use of the vector instruction which are
available in single element mode.  By using these instructions scalar
double operations can use 32 registers.


gcc/
        * config/s390/s390-modes.def: Add new modes CCVEQ, CCVFH, and
        CCVFHE.
        * config/s390/s390.c (s390_match_ccmode_set): Handle new modes.
        (s390_select_ccmode): Likewise.
        (s390_canonicalize_comparison): Swap operands if necessary.
        (s390_expand_vec_compare_scalar): Expand DFmode compare using
        single element vector instructions.
        (s390_emit_compare): Call s390_expand_vec_compare_scalar.
        (s390_branch_condition_mask): Generate CC masks for the new modes.
        * config/s390/s390.md (v0, vf, vd): New mode attributes.
        (VFCMP, asm_fcmp, insn_cmp): New mode iterator and attributes.
        (*vec_cmp<insn_cmp>df_cconly, *fixuns_truncdfdi2_z13)
        (*fix_trunc<BFP:mode><GPR:mode>2_bfp, *floatunsdidf2_z13)
        (*floatuns<GPR:mode><FP:mode>2, *extendsfdf2_z13)
        (*extend<DSF:mode><BFP:mode>2): New insn definition.
        (fix_trunc<BFP:mode><GPR:mode>2_bfp, loatuns<GPR:mode><FP:mode>2)
        (extend<DSF:mode><BFP:mode>2): Turn into expander.
        (floatdi<mode>2, truncdfsf2, add<mode>3, sub<mode>3, mul<mode>3)
        (div<mode>3, *neg<mode>2, *abs<mode>2, *negabs<mode>2)
        (sqrt<mode>2): Add vector instruction.

gcc/testsuite/
        * gcc.target/s390/vector/vec-scalar-cmp-1.c: New test.
---
 gcc/config/s390/s390-modes.def                     |   10 +
 gcc/config/s390/s390.c                             |  131 ++++++++-
 gcc/config/s390/s390.md                            |  304 ++++++++++++++------
 .../gcc.target/s390/vector/vec-scalar-cmp-1.c      |   49 ++++
 4 files changed, 403 insertions(+), 91 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/vec-scalar-cmp-1.c

diff --git a/gcc/config/s390/s390-modes.def b/gcc/config/s390/s390-modes.def
index a40559e..26c0a81 100644
--- a/gcc/config/s390/s390-modes.def
+++ b/gcc/config/s390/s390-modes.def
@@ -84,7 +84,12 @@ Requested mode            -> Destination CC register mode
 CCS, CCU, CCT, CCSR, CCUR -> CCZ
 CCA                       -> CCAP, CCAN
 
+Vector comparison modes
 
+CCVEQ            EQ      -            -           NE         (VCEQ)
+
+CCVFH    GT      -            -           UNLE       (VFCH)
+CCVFHE   GE      -            -           UNLT       (VFCHE)
 *** Comments ***
 
 CCAP, CCAN
@@ -182,6 +187,11 @@ CC_MODE (CCT2);
 CC_MODE (CCT3);
 CC_MODE (CCRAW);
 
+CC_MODE (CCVEQ);
+CC_MODE (CCVFH);
+CC_MODE (CCVFHE);
+
+
 /* Vector modes.  */
 
 VECTOR_MODES (INT, 2);        /*                 V2QI */
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 11fed14..848cc0c 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -681,6 +681,9 @@ s390_match_ccmode_set (rtx set, machine_mode req_mode)
     case CCT1mode:
     case CCT2mode:
     case CCT3mode:
+    case CCVEQmode:
+    case CCVFHmode:
+    case CCVFHEmode:
       if (req_mode != set_mode)
         return 0;
       break;
@@ -781,6 +784,29 @@ s390_tm_ccmode (rtx op1, rtx op2, bool mixed)
 machine_mode
 s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1)
 {
+  if (TARGET_VX
+      && register_operand (op0, DFmode)
+      && register_operand (op1, DFmode))
+    {
+      /* LT, LE, UNGT, UNGE require swapping OP0 and OP1.  Either
+        s390_emit_compare or s390_canonicalize_comparison will take
+        care of it.  */
+      switch (code)
+       {
+       case EQ:
+       case NE:
+         return CCVEQmode;
+       case GT:
+       case UNLE:
+         return CCVFHmode;
+       case GE:
+       case UNLT:
+         return CCVFHEmode;
+       default:
+         ;
+       }
+    }
+
   switch (code)
     {
       case EQ:
@@ -1058,8 +1084,74 @@ s390_canonicalize_comparison (int *code, rtx *op0, rtx 
*op1,
       rtx tem = *op0; *op0 = *op1; *op1 = tem;
       *code = (int)swap_condition ((enum rtx_code)*code);
     }
+
+  /* Using the scalar variants of vector instructions for 64 bit FP
+     comparisons might require swapping the operands.  */
+  if (TARGET_VX
+      && register_operand (*op0, DFmode)
+      && register_operand (*op1, DFmode)
+      && (*code == LT || *code == LE || *code == UNGT || *code == UNGE))
+    {
+      rtx tmp;
+
+      switch (*code)
+       {
+       case LT:   *code = GT; break;
+       case LE:   *code = GE; break;
+       case UNGT: *code = UNLE; break;
+       case UNGE: *code = UNLT; break;
+       default: ;
+       }
+      tmp = *op0; *op0 = *op1; *op1 = tmp;
+    }
 }
 
+/* Helper function for s390_emit_compare.  If possible emit a 64 bit
+   FP compare using the single element variant of vector instructions.
+   Replace CODE with the comparison code to be used in the CC reg
+   compare and return the condition code register RTX in CC.  */
+
+static bool
+s390_expand_vec_compare_scalar (enum rtx_code *code, rtx cmp1, rtx cmp2,
+                               rtx *cc)
+{
+  machine_mode cmp_mode;
+  bool swap_p = false;
+
+  switch (*code)
+    {
+    case EQ:   cmp_mode = CCVEQmode;  break;
+    case NE:   cmp_mode = CCVEQmode;  break;
+    case GT:   cmp_mode = CCVFHmode;  break;
+    case GE:   cmp_mode = CCVFHEmode; break;
+    case UNLE: cmp_mode = CCVFHmode;  break;
+    case UNLT: cmp_mode = CCVFHEmode; break;
+    case LT:   cmp_mode = CCVFHmode;  *code = GT;   swap_p = true; break;
+    case LE:   cmp_mode = CCVFHEmode; *code = GE;   swap_p = true; break;
+    case UNGE: cmp_mode = CCVFHmode;  *code = UNLE; swap_p = true; break;
+    case UNGT: cmp_mode = CCVFHEmode; *code = UNLT; swap_p = true; break;
+    default: return false;
+    }
+
+  if (swap_p)
+    {
+      rtx tmp = cmp2;
+      cmp2 = cmp1;
+      cmp1 = tmp;
+    }
+  *cc = gen_rtx_REG (cmp_mode, CC_REGNUM);
+  emit_insn (gen_rtx_PARALLEL (VOIDmode,
+              gen_rtvec (2,
+                         gen_rtx_SET (VOIDmode,
+                                      *cc,
+                                      gen_rtx_COMPARE (cmp_mode, cmp1,
+                                                       cmp2)),
+                         gen_rtx_CLOBBER (VOIDmode,
+                                          gen_rtx_SCRATCH (V2DImode)))));
+  return true;
+}
+
+
 /* Emit a compare instruction suitable to implement the comparison
    OP0 CODE OP1.  Return the correct condition RTL to be placed in
    the IF_THEN_ELSE of the conditional branch testing the result.  */
@@ -1070,10 +1162,18 @@ s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
   machine_mode mode = s390_select_ccmode (code, op0, op1);
   rtx cc;
 
-  /* Do not output a redundant compare instruction if a compare_and_swap
-     pattern already computed the result and the machine modes are compatible. 
 */
-  if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
+  if (TARGET_VX
+      && register_operand (op0, DFmode)
+      && register_operand (op1, DFmode)
+      && s390_expand_vec_compare_scalar (&code, op0, op1, &cc))
+    {
+      /* Work has been done by s390_expand_vec_compare_scalar already.  */
+    }
+  else if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
     {
+      /* Do not output a redundant compare instruction if a
+        compare_and_swap pattern already computed the result and the
+        machine modes are compatible.  */
       gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
                  == GET_MODE (op0));
       cc = op0;
@@ -1308,6 +1408,31 @@ s390_branch_condition_mask (rtx code)
         }
       break;
 
+      /* Vector comparison modes.  */
+
+    case CCVEQmode:
+      switch (GET_CODE (code))
+       {
+       case EQ:        return CC0;
+       case NE:        return CC3;
+       default:        return -1;
+       }
+      /* FP vector compare modes.  */
+
+    case CCVFHmode:
+      switch (GET_CODE (code))
+       {
+       case GT:        return CC0;
+       case UNLE:      return CC3;
+       default:        return -1;
+       }
+    case CCVFHEmode:
+      switch (GET_CODE (code))
+       {
+       case GE:        return CC0;
+       case UNLT:      return CC3;
+       default:        return -1;
+       }
     case CCRAWmode:
       switch (GET_CODE (code))
        {
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 8680770..40be3be 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -524,6 +524,14 @@
 ;; first and the second operand match for bfp modes.
 (define_mode_attr f0 [(TF "0") (DF "0") (SF "0") (TD "f") (DD "f") (DD "f")])
 
+;; This attribute is used to merge the scalar vector instructions into
+;; the FP patterns.  For non-supported modes (all but DF) it expands
+;; to constraints which are supposed to be matched by an earlier
+;; variant.
+(define_mode_attr v0      [(TF "0") (DF "v") (SF "0") (TD "0") (DD "0") (DD 
"0") (TI "0") (DI "v") (SI "0")])
+(define_mode_attr vf      [(TF "f") (DF "v") (SF "f") (TD "f") (DD "f") (DD 
"f") (TI "f") (DI "v") (SI "f")])
+(define_mode_attr vd      [(TF "d") (DF "v") (SF "d") (TD "d") (DD "d") (DD 
"d") (TI "d") (DI "v") (SI "d")])
+
 ;; This attribute is used in the operand list of the instruction to have an
 ;; additional operand for the dfp instructions.
 (define_mode_attr op1 [(TF "") (DF "") (SF "")
@@ -635,6 +643,17 @@
 ;; Allow return and simple_return to be defined from a single template.
 (define_code_iterator ANY_RETURN [return simple_return])
 
+
+
+; Condition code modes generated by vector fp comparisons.  These will
+; be used also in single element mode.
+(define_mode_iterator VFCMP [CCVEQ CCVFH CCVFHE])
+; Used with VFCMP to expand part of the mnemonic
+; For fp we have a mismatch: eq in the insn name - e in asm
+(define_mode_attr asm_fcmp [(CCVEQ "e") (CCVFH "h") (CCVFHE "he")])
+(define_mode_attr insn_cmp [(CCVEQ "eq") (CCVFH "h") (CCVFHE "he")])
+
+
 (include "vector.md")
 
 ;;
@@ -1144,6 +1163,15 @@
    [(set_attr "op_type" "RRE,RXE")
     (set_attr "type"  "fsimp<mode>")])
 
+; wfcedbs, wfchdbs, wfchedbs
+(define_insn "*vec_cmp<insn_cmp>df_cconly"
+  [(set (reg:VFCMP CC_REGNUM)
+       (compare:VFCMP (match_operand:DF 0 "register_operand" "v")
+                      (match_operand:DF 1 "register_operand" "v")))
+   (clobber (match_scratch:V2DI 2 "=v"))]
+  "TARGET_Z13 && TARGET_HARD_FLOAT"
+  "wfc<asm_fcmp>dbs\t%v2,%v0,%v1"
+  [(set_attr "op_type" "VRR")])
 
 ; Compare and Branch instructions
 
@@ -4361,14 +4389,27 @@
 
 ; fixuns_trunc(tf|df|sf|td|dd)(di|si)2 instruction patterns.
 
+(define_insn "*fixuns_truncdfdi2_z13"
+  [(set (match_operand:DI                  0 "register_operand" "=d,v")
+       (unsigned_fix:DI (match_operand:DF 1 "register_operand"  "f,v")))
+   (unspec:DI [(match_operand:DI           2 "immediate_operand" "K,K")] 
UNSPEC_ROUND)
+   (clobber (reg:CC CC_REGNUM))]
+   "TARGET_Z13 && TARGET_HARD_FLOAT"
+   "@
+    clgdbr\t%0,%h2,%1,0
+    wclgdb\t%v0,%v1,0,%h2"
+   [(set_attr "op_type" "RRF,VRR")
+    (set_attr "type"    "ftoi")])
+
 ; clfebr, clfdbr, clfxbr, clgebr, clgdbr, clgxbr
 ;         clfdtr, clfxtr,         clgdtr, clgxtr
 (define_insn "*fixuns_trunc<FP:mode><GPR:mode>2_z196"
-  [(set (match_operand:GPR 0 "register_operand" "=r")
-       (unsigned_fix:GPR (match_operand:FP 1 "register_operand" "f")))
-   (unspec:GPR [(match_operand:GPR 2 "immediate_operand" "K")] UNSPEC_ROUND)
+  [(set (match_operand:GPR                  0 "register_operand" "=d")
+       (unsigned_fix:GPR (match_operand:FP 1 "register_operand"  "f")))
+   (unspec:GPR [(match_operand:GPR          2 "immediate_operand" "K")] 
UNSPEC_ROUND)
    (clobber (reg:CC CC_REGNUM))]
-   "TARGET_Z196"
+   "TARGET_Z196 && TARGET_HARD_FLOAT
+    && (!TARGET_Z13 || <GPR:MODE>mode != DImode || <FP:MODE>mode != DFmode)"
    "cl<GPR:gf><FP:xde><FP:bt>r\t%0,%h2,%1,0"
    [(set_attr "op_type" "RRF")
     (set_attr "type"    "ftoi")])
@@ -4383,18 +4424,37 @@
   DONE;
 })
 
+(define_insn "*fix_truncdfdi2_bfp_z13"
+  [(set (match_operand:DI         0 "register_operand" "=d,v")
+        (fix:DI (match_operand:DF 1 "register_operand"  "f,v")))
+   (unspec:DI [(match_operand:DI  2 "immediate_operand" "K,K")] UNSPEC_ROUND)
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_Z13 && TARGET_HARD_FLOAT"
+  "@
+   cgdbr\t%0,%h2,%1
+   wcgdb\t%v0,%v1,0,%h2"
+  [(set_attr "op_type" "RRE,VRR")
+   (set_attr "type"    "ftoi")])
+
 ; cgxbr, cgdbr, cgebr, cfxbr, cfdbr, cfebr
-(define_insn "fix_trunc<BFP:mode><GPR:mode>2_bfp"
-  [(set (match_operand:GPR 0 "register_operand" "=d")
-        (fix:GPR (match_operand:BFP 1 "register_operand" "f")))
-   (unspec:GPR [(match_operand:GPR 2 "immediate_operand" "K")] UNSPEC_ROUND)
+(define_insn "*fix_trunc<BFP:mode><GPR:mode>2_bfp"
+  [(set (match_operand:GPR          0 "register_operand" "=d")
+        (fix:GPR (match_operand:BFP 1 "register_operand"  "f")))
+   (unspec:GPR [(match_operand:GPR  2 "immediate_operand" "K")] UNSPEC_ROUND)
    (clobber (reg:CC CC_REGNUM))]
-  "TARGET_HARD_FLOAT"
+  "TARGET_HARD_FLOAT
+    && (!TARGET_VX || <GPR:MODE>mode != DImode || <BFP:MODE>mode != DFmode)"
   "c<GPR:gf><BFP:xde>br\t%0,%h2,%1"
   [(set_attr "op_type" "RRE")
    (set_attr "type"    "ftoi")])
 
-
+(define_expand "fix_trunc<BFP:mode><GPR:mode>2_bfp"
+  [(parallel
+    [(set (match_operand:GPR          0 "register_operand" "=d")
+         (fix:GPR (match_operand:BFP 1 "register_operand"  "f")))
+     (unspec:GPR [(match_operand:GPR  2 "immediate_operand" "K")] UNSPEC_ROUND)
+     (clobber (reg:CC CC_REGNUM))])]
+  "TARGET_HARD_FLOAT")
 ;
 ; fix_trunc(td|dd)di2 instruction pattern(s).
 ;
@@ -4441,12 +4501,15 @@
 
 ; cxgbr, cdgbr, cegbr, cxgtr, cdgtr
 (define_insn "floatdi<mode>2"
-  [(set (match_operand:FP 0 "register_operand" "=f")
-        (float:FP (match_operand:DI 1 "register_operand" "d")))]
+  [(set (match_operand:FP           0 "register_operand" "=f,<vf>")
+        (float:FP (match_operand:DI 1 "register_operand"  "d,<vd>")))]
   "TARGET_ZARCH && TARGET_HARD_FLOAT"
-  "c<xde>g<bt>r\t%0,%1"
-  [(set_attr "op_type" "RRE")
-   (set_attr "type"    "itof<mode>" )])
+  "@
+   c<xde>g<bt>r\t%0,%1
+   wcdgb\t%v0,%v1,0,0"
+  [(set_attr "op_type"      "RRE,VRR")
+   (set_attr "type"         "itof<mode>" )
+   (set_attr "cpu_facility" "*,vec")])
 
 ; cxfbr, cdfbr, cefbr
 (define_insn "floatsi<mode>2"
@@ -4470,27 +4533,47 @@
 ; floatuns(si|di)(tf|df|sf|td|dd)2 instruction pattern(s).
 ;
 
+(define_insn "*floatunsdidf2_z13"
+  [(set (match_operand:DF                    0 "register_operand" "=f,v")
+        (unsigned_float:DF (match_operand:DI 1 "register_operand"  "d,v")))]
+  "TARGET_Z13 && TARGET_HARD_FLOAT"
+  "@
+   cdlgbr\t%0,0,%1,0
+   wcdlgb\t%v0,%v1,0,0"
+  [(set_attr "op_type" "RRE,VRR")
+   (set_attr "type"    "itofdf")])
+
 ; cxlgbr, cdlgbr, celgbr, cxlgtr, cdlgtr
 ; cxlfbr, cdlfbr, celfbr, cxlftr, cdlftr
-(define_insn "floatuns<GPR:mode><FP:mode>2"
-  [(set (match_operand:FP 0 "register_operand" "=f")
-        (unsigned_float:FP (match_operand:GPR 1 "register_operand" "d")))]
-  "TARGET_Z196 && TARGET_HARD_FLOAT"
+(define_insn "*floatuns<GPR:mode><FP:mode>2"
+  [(set (match_operand:FP                     0 "register_operand" "=f")
+        (unsigned_float:FP (match_operand:GPR 1 "register_operand"  "d")))]
+  "TARGET_Z196 && TARGET_HARD_FLOAT
+   && (!TARGET_VX || <FP:MODE>mode != DFmode || <GPR:MODE>mode != DImode)"
   "c<FP:xde>l<GPR:gf><FP:bt>r\t%0,0,%1,0"
   [(set_attr "op_type" "RRE")
-   (set_attr "type"    "itof<FP:mode>" )])
+   (set_attr "type"    "itof<FP:mode>")])
+
+(define_expand "floatuns<GPR:mode><FP:mode>2"
+  [(set (match_operand:FP                     0 "register_operand" "")
+        (unsigned_float:FP (match_operand:GPR 1 "register_operand" "")))]
+  "TARGET_Z196 && TARGET_HARD_FLOAT")
 
 ;
 ; truncdfsf2 instruction pattern(s).
 ;
 
 (define_insn "truncdfsf2"
-  [(set (match_operand:SF 0 "register_operand" "=f")
-        (float_truncate:SF (match_operand:DF 1 "register_operand" "f")))]
+  [(set (match_operand:SF                    0 "register_operand" "=f,v")
+        (float_truncate:SF (match_operand:DF 1 "register_operand"  "f,v")))]
   "TARGET_HARD_FLOAT"
-  "ledbr\t%0,%1"
-  [(set_attr "op_type"  "RRE")
-   (set_attr "type"   "ftruncdf")])
+  "@
+   ledbr\t%0,%1
+   wledb\t%v0,%v1,0,0" ; IEEE inexact exception not suppressed
+                       ; According to BFP rounding mode
+  [(set_attr "op_type"      "RRE,VRR")
+   (set_attr "type"         "ftruncdf")
+   (set_attr "cpu_facility" "*,vec")])
 
 ;
 ; trunctf(df|sf)2 instruction pattern(s).
@@ -4543,17 +4626,35 @@
 ; extend(sf|df)(df|tf)2 instruction pattern(s).
 ;
 
+(define_insn "*extendsfdf2_z13"
+  [(set (match_operand:DF                  0 "register_operand"     "=f,f,v")
+        (float_extend:DF (match_operand:SF 1 "nonimmediate_operand"  
"f,R,v")))]
+  "TARGET_Z13 && TARGET_HARD_FLOAT"
+  "@
+   ldebr\t%0,%1
+   ldeb\t%0,%1
+   wldeb\t%v0,%v1"
+  [(set_attr "op_type" "RRE,RXE,VRR")
+   (set_attr "type"    "fsimpdf, floaddf,fsimpdf")])
+
 ; ldebr, ldeb, lxdbr, lxdb, lxebr, lxeb
-(define_insn "extend<DSF:mode><BFP:mode>2"
-  [(set (match_operand:BFP 0 "register_operand" "=f,f")
+(define_insn "*extend<DSF:mode><BFP:mode>2"
+  [(set (match_operand:BFP                   0 "register_operand"     "=f,f")
         (float_extend:BFP (match_operand:DSF 1 "nonimmediate_operand"  
"f,R")))]
   "TARGET_HARD_FLOAT
-   && GET_MODE_SIZE (<BFP:MODE>mode) > GET_MODE_SIZE (<DSF:MODE>mode)"
+   && GET_MODE_SIZE (<BFP:MODE>mode) > GET_MODE_SIZE (<DSF:MODE>mode)
+   && (!TARGET_VX || <BFP:MODE>mode != DFmode || <DSF:MODE>mode != SFmode)"
   "@
    l<BFP:xde><DSF:xde>br\t%0,%1
    l<BFP:xde><DSF:xde>b\t%0,%1"
-  [(set_attr "op_type"  "RRE,RXE")
-   (set_attr "type"   "fsimp<BFP:mode>, fload<BFP:mode>")])
+  [(set_attr "op_type" "RRE,RXE")
+   (set_attr "type"    "fsimp<BFP:mode>, fload<BFP:mode>")])
+
+(define_expand "extend<DSF:mode><BFP:mode>2"
+  [(set (match_operand:BFP                   0 "register_operand"     "")
+        (float_extend:BFP (match_operand:DSF 1 "nonimmediate_operand" "")))]
+  "TARGET_HARD_FLOAT
+   && GET_MODE_SIZE (<BFP:MODE>mode) > GET_MODE_SIZE (<DSF:MODE>mode)")
 
 ;
 ; extendddtd2 and extendsddd2 instruction pattern(s).
@@ -5158,17 +5259,20 @@
 ;
 
 ; axbr, adbr, aebr, axb, adb, aeb, adtr, axtr
+; FIXME: wfadb does not clobber cc
 (define_insn "add<mode>3"
-  [(set (match_operand:FP 0 "register_operand"              "=f,   f")
-        (plus:FP (match_operand:FP 1 "nonimmediate_operand" "%<f0>,0")
-                (match_operand:FP 2 "general_operand"      " f,<Rf>")))
+  [(set (match_operand:FP 0 "register_operand"                 "=f,   f,<vf>")
+        (plus:FP (match_operand:FP 1 "nonimmediate_operand" "%<f0>,   0,<v0>")
+                (match_operand:FP 2 "general_operand"          "f,<Rf>,<vf>")))
    (clobber (reg:CC CC_REGNUM))]
   "TARGET_HARD_FLOAT"
   "@
    a<xde><bt>r\t%0,<op1>%2
-   a<xde>b\t%0,%2"
-  [(set_attr "op_type"  "<RRer>,RXE")
-   (set_attr "type"     "fsimp<mode>")])
+   a<xde>b\t%0,%2
+   wfadb\t%v0,%v1,%v2"
+  [(set_attr "op_type"      "<RRer>,RXE,VRR")
+   (set_attr "type"         "fsimp<mode>")
+   (set_attr "cpu_facility" "*,*,vec")])
 
 ; axbr, adbr, aebr, axb, adb, aeb, adtr, axtr
 (define_insn "*add<mode>3_cc"
@@ -5582,16 +5686,18 @@
 
 ; sxbr, sdbr, sebr, sdb, seb, sxtr, sdtr
 (define_insn "sub<mode>3"
-  [(set (match_operand:FP 0 "register_operand"            "=f,  f")
-        (minus:FP (match_operand:FP 1 "register_operand" "<f0>,0")
-                  (match_operand:FP 2 "general_operand"  "f,<Rf>")))
+  [(set (match_operand:FP           0 "register_operand"   "=f,   f,<vf>")
+        (minus:FP (match_operand:FP 1 "register_operand" "<f0>,   0,<v0>")
+                  (match_operand:FP 2 "general_operand"     "f,<Rf>,<vf>")))
    (clobber (reg:CC CC_REGNUM))]
   "TARGET_HARD_FLOAT"
   "@
    s<xde><bt>r\t%0,<op1>%2
-   s<xde>b\t%0,%2"
-  [(set_attr "op_type"  "<RRer>,RXE")
-   (set_attr "type"     "fsimp<mode>")])
+   s<xde>b\t%0,%2
+   wfsdb\t%v0,%v1,%v2"
+  [(set_attr "op_type"      "<RRer>,RXE,VRR")
+   (set_attr "type"         "fsimp<mode>")
+   (set_attr "cpu_facility" "*,*,vec")])
 
 ; sxbr, sdbr, sebr, sdb, seb, sxtr, sdtr
 (define_insn "*sub<mode>3_cc"
@@ -5997,41 +6103,47 @@
 
 ; mxbr, mdbr, meebr, mxb, mxb, meeb, mdtr, mxtr
 (define_insn "mul<mode>3"
-  [(set (match_operand:FP 0 "register_operand"              "=f,f")
-        (mult:FP (match_operand:FP 1 "nonimmediate_operand" "%<f0>,0")
-                 (match_operand:FP 2 "general_operand"      "f,<Rf>")))]
+  [(set (match_operand:FP          0 "register_operand"        "=f,   f,<vf>")
+        (mult:FP (match_operand:FP 1 "nonimmediate_operand" "%<f0>,   0,<v0>")
+                 (match_operand:FP 2 "general_operand"          
"f,<Rf>,<vf>")))]
   "TARGET_HARD_FLOAT"
   "@
    m<xdee><bt>r\t%0,<op1>%2
-   m<xdee>b\t%0,%2"
-  [(set_attr "op_type"  "<RRer>,RXE")
-   (set_attr "type"     "fmul<mode>")])
+   m<xdee>b\t%0,%2
+   wfmdb\t%v0,%v1,%v2"
+  [(set_attr "op_type"      "<RRer>,RXE,VRR")
+   (set_attr "type"         "fmul<mode>")
+   (set_attr "cpu_facility" "*,*,vec")])
 
 ; madbr, maebr, maxb, madb, maeb
 (define_insn "fma<mode>4"
-  [(set (match_operand:DSF 0 "register_operand" "=f,f")
-       (fma:DSF (match_operand:DSF 1 "nonimmediate_operand" "%f,f")
-                (match_operand:DSF 2 "nonimmediate_operand" "f,R")
-                (match_operand:DSF 3 "register_operand" "0,0")))]
+  [(set (match_operand:DSF          0 "register_operand"     "=f,f,<vf>")
+       (fma:DSF (match_operand:DSF 1 "nonimmediate_operand" "%f,f,<vf>")
+                (match_operand:DSF 2 "nonimmediate_operand"  "f,R,<vf>")
+                (match_operand:DSF 3 "register_operand"      "0,0,<v0>")))]
   "TARGET_HARD_FLOAT"
   "@
    ma<xde>br\t%0,%1,%2
-   ma<xde>b\t%0,%1,%2"
-  [(set_attr "op_type"  "RRE,RXE")
-   (set_attr "type"     "fmadd<mode>")])
+   ma<xde>b\t%0,%1,%2
+   wfmadb\t%v0,%v1,%v2,%v3"
+  [(set_attr "op_type"      "RRE,RXE,VRR")
+   (set_attr "type"         "fmadd<mode>")
+   (set_attr "cpu_facility" "*,*,vec")])
 
 ; msxbr, msdbr, msebr, msxb, msdb, mseb
 (define_insn "fms<mode>4"
-  [(set (match_operand:DSF 0 "register_operand" "=f,f")
-       (fma:DSF (match_operand:DSF 1 "nonimmediate_operand" "%f,f")
-                (match_operand:DSF 2 "nonimmediate_operand" "f,R")
-                (neg:DSF (match_operand:DSF 3 "register_operand" "0,0"))))]
+  [(set (match_operand:DSF                   0 "register_operand"     
"=f,f,<vf>")
+       (fma:DSF (match_operand:DSF          1 "nonimmediate_operand" 
"%f,f,<vf>")
+                (match_operand:DSF          2 "nonimmediate_operand"  
"f,R,<vf>")
+                (neg:DSF (match_operand:DSF 3 "register_operand"      
"0,0,<v0>"))))]
   "TARGET_HARD_FLOAT"
   "@
    ms<xde>br\t%0,%1,%2
-   ms<xde>b\t%0,%1,%2"
-  [(set_attr "op_type"  "RRE,RXE")
-   (set_attr "type"     "fmadd<mode>")])
+   ms<xde>b\t%0,%1,%2
+   wfmsdb\t%v0,%v1,%v2,%v3"
+  [(set_attr "op_type"      "RRE,RXE,VRR")
+   (set_attr "type"         "fmadd<mode>")
+   (set_attr "cpu_facility" "*,*,vec")])
 
 ;;
 ;;- Divide and modulo instructions.
@@ -6457,15 +6569,17 @@
 
 ; dxbr, ddbr, debr, dxb, ddb, deb, ddtr, dxtr
 (define_insn "div<mode>3"
-  [(set (match_operand:FP 0 "register_operand"          "=f,f")
-        (div:FP (match_operand:FP 1 "register_operand" "<f0>,0")
-                 (match_operand:FP 2 "general_operand"  "f,<Rf>")))]
+  [(set (match_operand:FP         0 "register_operand"   "=f,   f,<vf>")
+        (div:FP (match_operand:FP 1 "register_operand" "<f0>,   0,<v0>")
+               (match_operand:FP 2 "general_operand"     "f,<Rf>,<vf>")))]
   "TARGET_HARD_FLOAT"
   "@
    d<xde><bt>r\t%0,<op1>%2
-   d<xde>b\t%0,%2"
-  [(set_attr "op_type"  "<RRer>,RXE")
-   (set_attr "type"     "fdiv<mode>")])
+   d<xde>b\t%0,%2
+   wfddb\t%v0,%v1,%v2"
+  [(set_attr "op_type"      "<RRer>,RXE,VRR")
+   (set_attr "type"         "fdiv<mode>")
+   (set_attr "cpu_facility" "*,*,vec")])
 
 
 ;;
@@ -7674,14 +7788,18 @@
    (set_attr "type"     "fsimp<mode>")])
 
 ; lcxbr, lcdbr, lcebr
+; FIXME: wflcdb does not clobber cc
 (define_insn "*neg<mode>2"
-  [(set (match_operand:BFP 0 "register_operand" "=f")
-        (neg:BFP (match_operand:BFP 1 "register_operand" "f")))
+  [(set (match_operand:BFP          0 "register_operand" "=f,<vf>")
+        (neg:BFP (match_operand:BFP 1 "register_operand"  "f,<vf>")))
    (clobber (reg:CC CC_REGNUM))]
   "TARGET_HARD_FLOAT"
-  "lc<xde>br\t%0,%1"
-  [(set_attr "op_type"  "RRE")
-   (set_attr "type"     "fsimp<mode>")])
+  "@
+   lc<xde>br\t%0,%1
+   wflcdb\t%0,%1"
+  [(set_attr "op_type"      "RRE,VRR")
+   (set_attr "cpu_facility" "*,vec")
+   (set_attr "type"         "fsimp<mode>,*")])
 
 
 ;;
@@ -7792,14 +7910,18 @@
    (set_attr "type"     "fsimp<mode>")])
 
 ; lpxbr, lpdbr, lpebr
+; FIXME: wflpdb does not clobber cc
 (define_insn "*abs<mode>2"
-  [(set (match_operand:BFP 0 "register_operand" "=f")
-        (abs:BFP (match_operand:BFP 1 "register_operand" "f")))
+  [(set (match_operand:BFP          0 "register_operand" "=f,<vf>")
+        (abs:BFP (match_operand:BFP 1 "register_operand"  "f,<vf>")))
    (clobber (reg:CC CC_REGNUM))]
   "TARGET_HARD_FLOAT"
-  "lp<xde>br\t%0,%1"
-  [(set_attr "op_type"  "RRE")
-   (set_attr "type"     "fsimp<mode>")])
+  "@
+    lp<xde>br\t%0,%1
+    wflpdb\t%0,%1"
+  [(set_attr "op_type"      "RRE,VRR")
+   (set_attr "cpu_facility" "*,vec")
+   (set_attr "type"         "fsimp<mode>,*")])
 
 
 ;;
@@ -7903,14 +8025,18 @@
    (set_attr "type"     "fsimp<mode>")])
 
 ; lnxbr, lndbr, lnebr
+; FIXME: wflndb does not clobber cc
 (define_insn "*negabs<mode>2"
-  [(set (match_operand:BFP 0 "register_operand" "=f")
-        (neg:BFP (abs:BFP (match_operand:BFP 1 "register_operand" "f"))))
+  [(set (match_operand:BFP                   0 "register_operand" "=f,<vf>")
+        (neg:BFP (abs:BFP (match_operand:BFP 1 "register_operand"  "f,<vf>"))))
    (clobber (reg:CC CC_REGNUM))]
   "TARGET_HARD_FLOAT"
-  "ln<xde>br\t%0,%1"
-  [(set_attr "op_type"  "RRE")
-   (set_attr "type"     "fsimp<mode>")])
+  "@
+   ln<xde>br\t%0,%1
+   wflndb\t%0,%1"
+  [(set_attr "op_type"      "RRE,VRR")
+   (set_attr "cpu_facility" "*,vec")
+   (set_attr "type"         "fsimp<mode>,*")])
 
 ;;
 ;;- Square root instructions.
@@ -7922,14 +8048,16 @@
 
 ; sqxbr, sqdbr, sqebr, sqdb, sqeb
 (define_insn "sqrt<mode>2"
-  [(set (match_operand:BFP 0 "register_operand" "=f,f")
-       (sqrt:BFP (match_operand:BFP 1 "general_operand" "f,<Rf>")))]
+  [(set (match_operand:BFP           0 "register_operand" "=f,   f,<vf>")
+       (sqrt:BFP (match_operand:BFP 1 "general_operand"   "f,<Rf>,<vf>")))]
   "TARGET_HARD_FLOAT"
   "@
    sq<xde>br\t%0,%1
-   sq<xde>b\t%0,%1"
-  [(set_attr "op_type" "RRE,RXE")
-   (set_attr "type" "fsqrt<mode>")])
+   sq<xde>b\t%0,%1
+   wfsqdb\t%v0,%v1"
+  [(set_attr "op_type"      "RRE,RXE,VRR")
+   (set_attr "type"         "fsqrt<mode>")
+   (set_attr "cpu_facility" "*,*,vec")])
 
 
 ;;
diff --git a/gcc/testsuite/gcc.target/s390/vector/vec-scalar-cmp-1.c 
b/gcc/testsuite/gcc.target/s390/vector/vec-scalar-cmp-1.c
new file mode 100644
index 0000000..9f3e2b6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/vec-scalar-cmp-1.c
@@ -0,0 +1,49 @@
+/* Check that we use the scalar variants of vector compares.  */
+
+/* { dg-do compile { target { s390*-*-* } } } */
+/* { dg-options "-O3 -mzarch -march=z13" } */
+
+/* { dg-final { scan-assembler-times "wfcedbs\t%v0,%v0,%v2" 2 } } */
+/* { dg-final { scan-assembler-times "wfchdbs\t%v0,%v0,%v2" 1 } } */
+/* { dg-final { scan-assembler-times "wfchedbs\t%v0,%v2,%v0" 1 } } */
+/* { dg-final { scan-assembler-times "wfchdbs\t%v0,%v2,%v0" 1 } } */
+/* { dg-final { scan-assembler-times "wfchedbs\t%v0,%v2,%v0" 1 } } */
+/* { dg-final { scan-assembler-times "je" 5 } } */
+/* { dg-final { scan-assembler-times "jo" 1 } } */
+
+
+double
+eq (double a, double b)
+{
+  return a == b;
+}
+
+double
+ne (double a, double b)
+{
+  return a != b;
+}
+
+double
+gt (double a, double b)
+{
+  return a > b;
+}
+
+double
+ge (double a, double b)
+{
+  return a >= b;
+}
+
+double
+lt (double a, double b)
+{
+  return a < b;
+}
+
+double
+le (double a, double b)
+{
+  return a <= b;
+}
-- 
1.7.9.5

[PATCH 07/13] S/390 Add vector scalar instruction support.

Reply via email to