Add vector single element 128-bit integer support utilizing new
instructions vclzq and vctzq. Furthermore, add scalar 64-bit integer
support utilizing new instructions clzg and ctzg. For ctzg, also define
the resulting value if the input operand equals zero.
gcc/ChangeLog:
* config/s390/s390-builtins.def (s390_vec_cntlz): Add 128-bit
integer overloads.
(s390_vclzq): Add.
(s390_vec_cnttz): Add 128-bit integer overloads.
(s390_vctzq): Add.
* config/s390/s390-builtin-types.def: Update accordingly.
* config/s390/s390.h (CTZ_DEFINED_VALUE_AT_ZERO): Define.
* config/s390/s390.md (*clzg): New insn.
(clztidi2): Exploit new insn for target arch15.
(ctzdi2): New insn.
* config/s390/vector.md (clz<mode>2): Extend modes including
128-bit integer.
(ctz<mode>2): Likewise.
---
gcc/config/s390/s390-builtin-types.def | 1 +
gcc/config/s390/s390-builtins.def | 10 +++++--
gcc/config/s390/s390.h | 3 ++
gcc/config/s390/s390.md | 40 ++++++++++++++++++++------
gcc/config/s390/vector.md | 15 ++++++----
5 files changed, 52 insertions(+), 17 deletions(-)
diff --git a/gcc/config/s390/s390-builtin-types.def
b/gcc/config/s390/s390-builtin-types.def
index f0561839309..6f903deb745 100644
--- a/gcc/config/s390/s390-builtin-types.def
+++ b/gcc/config/s390/s390-builtin-types.def
@@ -610,6 +610,7 @@ DEF_OV_TYPE (BT_OV_UV1TI_UV2DI, BT_UV1TI, BT_UV2DI)
DEF_OV_TYPE (BT_OV_UV1TI_UV2DI_UV2DI, BT_UV1TI, BT_UV2DI, BT_UV2DI)
DEF_OV_TYPE (BT_OV_UV1TI_UV2DI_UV2DI_UV1TI, BT_UV1TI, BT_UV2DI, BT_UV2DI,
BT_UV1TI)
DEF_OV_TYPE (BT_OV_UV1TI_UV4SI_UV4SI, BT_UV1TI, BT_UV4SI, BT_UV4SI)
+DEF_OV_TYPE (BT_OV_UV1TI_V1TI, BT_UV1TI, BT_V1TI)
DEF_OV_TYPE (BT_OV_UV2DI_BV2DI_UV2DI, BT_UV2DI, BT_BV2DI, BT_UV2DI)
DEF_OV_TYPE (BT_OV_UV2DI_LONG_ULONGLONGCONSTPTR, BT_UV2DI, BT_LONG,
BT_ULONGLONGCONSTPTR)
DEF_OV_TYPE (BT_OV_UV2DI_ULONGLONG, BT_UV2DI, BT_ULONGLONG)
diff --git a/gcc/config/s390/s390-builtins.def
b/gcc/config/s390/s390-builtins.def
index 2cf443f6cdb..8eb07e6c79d 100644
--- a/gcc/config/s390/s390-builtins.def
+++ b/gcc/config/s390/s390-builtins.def
@@ -1639,7 +1639,7 @@ B_DEF (vec_cmpltuv2di, vec_cmpltuv2di,
0,
B_DEF (vec_cmpltv4sf, vec_cmpltv4sf_quiet_nocc,0,
B_INT | B_VXE, 0, BT_FN_V4SI_V4SF_V4SF)
B_DEF (vec_cmpltv2df, vec_cmpltv2df_quiet_nocc,0,
B_INT | B_VX, 0, BT_FN_V2DI_V2DF_V2DF)
-OB_DEF (s390_vec_cntlz, s390_vec_cntlz_s8,
s390_vec_cntlz_u64, B_VX, BT_FN_OV4SI_OV4SI)
+OB_DEF (s390_vec_cntlz, s390_vec_cntlz_s8,
s390_vec_cntlz_u128,B_VX, BT_FN_OV4SI_OV4SI)
OB_DEF_VAR (s390_vec_cntlz_s8, s390_vclzb, 0,
0, BT_OV_UV16QI_V16QI)
OB_DEF_VAR (s390_vec_cntlz_u8, s390_vclzb, 0,
0, BT_OV_UV16QI_UV16QI)
OB_DEF_VAR (s390_vec_cntlz_s16, s390_vclzh, 0,
0, BT_OV_UV8HI_V8HI)
@@ -1648,13 +1648,16 @@ OB_DEF_VAR (s390_vec_cntlz_s32, s390_vclzf,
0,
OB_DEF_VAR (s390_vec_cntlz_u32, s390_vclzf, 0,
0, BT_OV_UV4SI_UV4SI)
OB_DEF_VAR (s390_vec_cntlz_s64, s390_vclzg, 0,
0, BT_OV_UV2DI_V2DI)
OB_DEF_VAR (s390_vec_cntlz_u64, s390_vclzg, 0,
0, BT_OV_UV2DI_UV2DI)
+OB_DEF_VAR (s390_vec_cntlz_s128, s390_vclzq, B_VXE3,
0, BT_OV_UV1TI_V1TI)
+OB_DEF_VAR (s390_vec_cntlz_u128, s390_vclzq, B_VXE3,
0, BT_OV_UV1TI_UV1TI)
B_DEF (s390_vclzb, clzv16qi2, 0,
B_VX, 0, BT_FN_UV16QI_UV16QI)
B_DEF (s390_vclzh, clzv8hi2, 0,
B_VX, 0, BT_FN_UV8HI_UV8HI)
B_DEF (s390_vclzf, clzv4si2, 0,
B_VX, 0, BT_FN_UV4SI_UV4SI)
B_DEF (s390_vclzg, clzv2di2, 0,
B_VX, 0, BT_FN_UV2DI_UV2DI)
+B_DEF (s390_vclzq, clzti2, 0,
B_VXE3, 0, BT_FN_UINT128_UINT128)
-OB_DEF (s390_vec_cnttz, s390_vec_cnttz_s8,
s390_vec_cnttz_u64, B_VX, BT_FN_OV4SI_OV4SI)
+OB_DEF (s390_vec_cnttz, s390_vec_cnttz_s8,
s390_vec_cnttz_u128,B_VX, BT_FN_OV4SI_OV4SI)
OB_DEF_VAR (s390_vec_cnttz_s8, s390_vctzb, 0,
0, BT_OV_UV16QI_V16QI)
OB_DEF_VAR (s390_vec_cnttz_u8, s390_vctzb, 0,
0, BT_OV_UV16QI_UV16QI)
OB_DEF_VAR (s390_vec_cnttz_s16, s390_vctzh, 0,
0, BT_OV_UV8HI_V8HI)
@@ -1663,11 +1666,14 @@ OB_DEF_VAR (s390_vec_cnttz_s32, s390_vctzf,
0,
OB_DEF_VAR (s390_vec_cnttz_u32, s390_vctzf, 0,
0, BT_OV_UV4SI_UV4SI)
OB_DEF_VAR (s390_vec_cnttz_s64, s390_vctzg, 0,
0, BT_OV_UV2DI_V2DI)
OB_DEF_VAR (s390_vec_cnttz_u64, s390_vctzg, 0,
0, BT_OV_UV2DI_UV2DI)
+OB_DEF_VAR (s390_vec_cnttz_s128, s390_vctzq, B_VXE3,
0, BT_OV_UV1TI_V1TI)
+OB_DEF_VAR (s390_vec_cnttz_u128, s390_vctzq, B_VXE3,
0, BT_OV_UV1TI_UV1TI)
B_DEF (s390_vctzb, ctzv16qi2, 0,
B_VX, 0, BT_FN_UV16QI_UV16QI)
B_DEF (s390_vctzh, ctzv8hi2, 0,
B_VX, 0, BT_FN_UV8HI_UV8HI)
B_DEF (s390_vctzf, ctzv4si2, 0,
B_VX, 0, BT_FN_UV4SI_UV4SI)
B_DEF (s390_vctzg, ctzv2di2, 0,
B_VX, 0, BT_FN_UV2DI_UV2DI)
+B_DEF (s390_vctzq, ctzti2, 0,
B_VXE3, 0, BT_FN_UINT128_UINT128)
OB_DEF (s390_vec_xor, s390_vec_xor_b8,
s390_vec_xor_dbl_c, B_VX, BT_FN_OV4SI_OV4SI_OV4SI)
OB_DEF_VAR (s390_vec_xor_b8, s390_vx, 0,
0, BT_OV_BV16QI_BV16QI_BV16QI)
diff --git a/gcc/config/s390/s390.h b/gcc/config/s390/s390.h
index 2f5b95fa2b8..957877b6a38 100644
--- a/gcc/config/s390/s390.h
+++ b/gcc/config/s390/s390.h
@@ -1034,6 +1034,9 @@ do {
\
/* Specify the value which is used when clz operand is zero. */
#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 64, 1)
+/* Specify the value which is used when ctz operand is zero. */
+#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 64, 1)
+
/* Machine-specific symbol_ref flags. */
#define SYMBOL_FLAG_ALIGN_SHIFT SYMBOL_FLAG_MACH_DEP_SHIFT
#define SYMBOL_FLAG_ALIGN_MASK \
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 7b5b9709f56..03bd85e1398 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -9479,21 +9479,31 @@
(clz:DI (match_operand:DI 1 "register_operand" "d")))]
"TARGET_EXTIMM && TARGET_ZARCH"
{
- rtx_insn *insn;
- rtx clz_equal;
- rtx wide_reg = gen_reg_rtx (TImode);
- rtx msb = gen_rtx_CONST_INT (DImode, HOST_WIDE_INT_1U << 63);
+ if (!(TARGET_ARCH15 && TARGET_64BIT))
+ {
+ rtx_insn *insn;
+ rtx clz_equal;
+ rtx wide_reg = gen_reg_rtx (TImode);
+ rtx msb = gen_rtx_CONST_INT (DImode, HOST_WIDE_INT_1U << 63);
- clz_equal = gen_rtx_CLZ (DImode, operands[1]);
+ clz_equal = gen_rtx_CLZ (DImode, operands[1]);
- emit_insn (gen_clztidi2 (wide_reg, operands[1], msb));
+ emit_insn (gen_clztidi2 (wide_reg, operands[1], msb));
- insn = emit_move_insn (operands[0], gen_highpart (DImode, wide_reg));
- set_unique_reg_note (insn, REG_EQUAL, clz_equal);
+ insn = emit_move_insn (operands[0], gen_highpart (DImode, wide_reg));
+ set_unique_reg_note (insn, REG_EQUAL, clz_equal);
- DONE;
+ DONE;
+ }
})
+(define_insn "*clzg"
+ [(set (match_operand:DI 0 "register_operand" "=d")
+ (clz:DI (match_operand:DI 1 "register_operand" "d")))]
+ "TARGET_ARCH15 && TARGET_64BIT"
+ "clzg\t%0,%1"
+ [(set_attr "op_type" "RRE")])
+
; CLZ result is in hard reg op0 - this is the high part of the target operand
; The source with the left-most one bit cleared is in hard reg op0 + 1 - the
low part
(define_insn "clztidi2"
@@ -9512,6 +9522,18 @@
[(set_attr "op_type" "RRE")])
+;;
+;; Count Trailing Zeros.
+;;
+
+(define_insn "ctzdi2"
+ [(set (match_operand:DI 0 "register_operand" "=d")
+ (ctz:DI (match_operand:DI 1 "register_operand" "d")))]
+ "TARGET_ARCH15 && TARGET_64BIT"
+ "ctzg\t%0,%1"
+ [(set_attr "op_type" "RRE")])
+
+
;;
;;- Rotate instructions.
;;
diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index 06641bfcc7b..2e7419c45c3 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -25,6 +25,9 @@
(define_mode_iterator VT
[V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI V2SI V4SI V1DI V2DI V1SF
V2SF V4SF V1DF V2DF V1TF V1TI TI])
+(define_mode_iterator VT_VXE3
+ [V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI V2SI V4SI V1DI V2DI V1SF
+ V2SF V4SF V1DF V2DF V1TF (V1TI "TARGET_VXE3") (TI "TARGET_VXE3")])
; All modes directly supported by the hardware having full vector reg size
(define_mode_iterator V_HW [V16QI V8HI V4SI V2DI V1TI TI V2DF
@@ -1369,19 +1372,19 @@
})
; Count leading zeros
-; vclzb, vclzh, vclzf, vclzg
+; vclzb, vclzh, vclzf, vclzg, vclzq
(define_insn "clz<mode>2"
- [(set (match_operand:V 0 "register_operand" "=v")
- (clz:V (match_operand:V 1 "register_operand" "v")))]
+ [(set (match_operand:VT_VXE3 0 "register_operand" "=v")
+ (clz:VT_VXE3 (match_operand:VT_VXE3 1 "register_operand" "v")))]
"TARGET_VX"
"vclz<bhfgq>\t%v0,%v1"
[(set_attr "op_type" "VRR")])
; Count trailing zeros
-; vctzb, vctzh, vctzf, vctzg
+; vctzb, vctzh, vctzf, vctzg, vctzq
(define_insn "ctz<mode>2"
- [(set (match_operand:V 0 "register_operand" "=v")
- (ctz:V (match_operand:V 1 "register_operand" "v")))]
+ [(set (match_operand:VT_VXE3 0 "register_operand" "=v")
+ (ctz:VT_VXE3 (match_operand:VT_VXE3 1 "register_operand" "v")))]
"TARGET_VX"
"vctz<bhfgq>\t%v0,%v1"
[(set_attr "op_type" "VRR")])
--
2.47.0