[PATCH v2, PR target/94954] Fix wrong codegen for vec_pack_to_short_fp32()
builtin
Hi,
Fix codegen for builtin vec_pack_to_short_fp32. This includes adding
a define_insn for xvcvsphp, and adding a new define_expand for
convert_4f32_8f16.
[v2]
Comment on altivec.md "convert_4f32_8f16" enhanced.
Testsuite builtins-1-p9-runnable.c updated with additional description
of the built-in and to improve the target statements.
OK for trunk and backports?
Thanks
-Will
[gcc]
target pr/94954
* config/rs6000/altivec.h (vec_pack_to_short_fp32) Update.
* config/rs6000/altivec.md (UNSPEC_CONVERT_4F32_8F16): New unspec.
(convert_4f32_8f16): New define_expand
* config/rs6000/rs6000-builtin.def (convert_4f32_8f16): New builtin define
and overload.
* config/rs6000/rs6000-call.c (P9V_BUILTIN_VEC_CONVERT_4F32_8F16): New
overloaded builtin entry.
* config/rs6000/vsx.md (UNSPEC_VSX_XVCVSPHP): New unspec.
(vsx_xvcvsphp): New define_insn.
[testsuite]
* testsuite/gcc.target/powerpc/builtins-1-p9-runnable.c: Update.
diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index 0a7e8ab..ab10025 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -431,11 +431,11 @@
/* Vector additions added in ISA 3.0. */
#define vec_first_match_index __builtin_vec_first_match_index
#define vec_first_match_or_eos_index __builtin_vec_first_match_or_eos_index
#define vec_first_mismatch_index __builtin_vec_first_mismatch_index
#define vec_first_mismatch_or_eos_index
__builtin_vec_first_mismatch_or_eos_index
-#define vec_pack_to_short_fp32 __builtin_vec_convert_4f32_8i16
+#define vec_pack_to_short_fp32 __builtin_vec_convert_4f32_8f16
#define vec_parity_lsbb __builtin_vec_vparity_lsbb
#define vec_vctz __builtin_vec_vctz
#define vec_cnttz __builtin_vec_vctz
#define vec_vctzb __builtin_vec_vctzb
#define vec_vctzd __builtin_vec_vctzd
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 159f24e..5ce54c8 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -78,10 +78,11 @@
UNSPEC_VUNPACK_HI_SIGN_DIRECT
UNSPEC_VUNPACK_LO_SIGN_DIRECT
UNSPEC_VUPKHPX
UNSPEC_VUPKLPX
UNSPEC_CONVERT_4F32_8I16
+ UNSPEC_CONVERT_4F32_8F16
UNSPEC_DST
UNSPEC_DSTT
UNSPEC_DSTST
UNSPEC_DSTSTT
UNSPEC_LVSL
@@ -3215,10 +3216,43 @@
emit_insn (gen_altivec_vctuxs (rtx_tmp_lo, operands[2], const0_rtx));
emit_insn (gen_altivec_vpkswss (operands[0], rtx_tmp_hi, rtx_tmp_lo));
DONE;
})
+
+;; Convert two vector F32 to packed vector F16.
+;; This builtin packs 32-bit floating-point values into a packed
+;; 16-bit floating point values (stored in 16bit integer type).
+;; (vector unsigned short r = vec_pack_to_short_fp32 (a, b);
+;; The expected codegen for this builtin is
+;; xvcvsphp t, a
+;; xvcvsphp u, b
+;; if (little endian)
+;; vpkuwum r, t, u
+;; else
+;; vpkuwum r, u, t
+
+(define_expand "convert_4f32_8f16"
+ [(set (match_operand:V8HI 0 "register_operand" "=v")
+ (unspec:V8HI [(match_operand:V4SF 1 "register_operand" "v")
+ (match_operand:V4SF 2 "register_operand" "v")]
+ UNSPEC_CONVERT_4F32_8F16))]
+ "TARGET_P9_VECTOR"
+{
+ rtx rtx_tmp_hi = gen_reg_rtx (V4SImode);
+ rtx rtx_tmp_lo = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_vsx_xvcvsphp (rtx_tmp_hi, operands[1]));
+ emit_insn (gen_vsx_xvcvsphp (rtx_tmp_lo, operands[2]));
+ if (!BYTES_BIG_ENDIAN)
+ emit_insn (gen_altivec_vpkuwum (operands[0], rtx_tmp_hi, rtx_tmp_lo));
+ else
+ emit_insn (gen_altivec_vpkuwum (operands[0], rtx_tmp_lo, rtx_tmp_hi));
+ DONE;
+})
+
+
;; Generate
;; xxlxor/vxor SCRATCH0,SCRATCH0,SCRATCH0
;; vsubu?m SCRATCH2,SCRATCH1,%1
;; vmaxs? %0,%1,SCRATCH2"
(define_expand "abs<mode>2"
diff --git a/gcc/config/rs6000/rs6000-builtin.def
b/gcc/config/rs6000/rs6000-builtin.def
index 8b1ddb0..47e9137 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -2208,10 +2208,11 @@ BU_P8V_OVERLOAD_3 (VPERMXOR, "vpermxor")
/* ISA 3.0 vector overloaded 2-argument functions. */
BU_P9V_AV_2 (VSLV, "vslv", CONST, vslv)
BU_P9V_AV_2 (VSRV, "vsrv", CONST, vsrv)
BU_P9V_AV_2 (CONVERT_4F32_8I16, "convert_4f32_8i16", CONST, convert_4f32_8i16)
+BU_P9V_AV_2 (CONVERT_4F32_8F16, "convert_4f32_8f16", CONST, convert_4f32_8f16)
BU_P9V_AV_2 (VFIRSTMATCHINDEX_V16QI, "first_match_index_v16qi",
CONST, first_match_index_v16qi)
BU_P9V_AV_2 (VFIRSTMATCHINDEX_V8HI, "first_match_index_v8hi",
CONST, first_match_index_v8hi)
@@ -2238,10 +2239,11 @@ BU_P9V_AV_2 (VFIRSTMISMATCHOREOSINDEX_V4SI,
"first_mismatch_or_eos_index_v4si",
/* ISA 3.0 vector overloaded 2-argument functions. */
BU_P9V_OVERLOAD_2 (VSLV, "vslv")
BU_P9V_OVERLOAD_2 (VSRV, "vsrv")
BU_P9V_OVERLOAD_2 (CONVERT_4F32_8I16, "convert_4f32_8i16")
+BU_P9V_OVERLOAD_2 (CONVERT_4F32_8F16, "convert_4f32_8f16")
/* 2 argument vector functions added in ISA 3.0 (power9). */
BU_P9V_AV_2 (VADUB, "vadub", CONST, vaduv16qi3)
BU_P9V_AV_2 (VADUH, "vaduh", CONST, vaduv8hi3)
BU_P9V_AV_2 (VADUW, "vaduw", CONST, vaduv4si3)
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index 0ac8054..9708d7e 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -1975,10 +1975,12 @@ const struct altivec_builtin_types
altivec_overloaded_builtins[] = {
{ P8V_BUILTIN_VEC_NEG, P8V_BUILTIN_NEG_V2DF,
RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 },
{ P9V_BUILTIN_VEC_CONVERT_4F32_8I16, P9V_BUILTIN_CONVERT_4F32_8I16,
RS6000_BTI_unsigned_V8HI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+ { P9V_BUILTIN_VEC_CONVERT_4F32_8F16, P9V_BUILTIN_CONVERT_4F32_8F16,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
{ P9V_BUILTIN_VEC_VFIRSTMATCHINDEX, P9V_BUILTIN_VFIRSTMATCHINDEX_V16QI,
RS6000_BTI_UINTSI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
{ P9V_BUILTIN_VEC_VFIRSTMATCHINDEX, P9V_BUILTIN_VFIRSTMATCHINDEX_V16QI,
RS6000_BTI_UINTSI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0
},
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 2a28215..da67b3a 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -295,10 +295,11 @@
UNSPEC_VSX_DIVSD
UNSPEC_VSX_DIVUD
UNSPEC_VSX_MULSD
UNSPEC_VSX_SIGN_EXTEND
UNSPEC_VSX_XVCVSPSXDS
+ UNSPEC_VSX_XVCVSPHP
UNSPEC_VSX_VSLO
UNSPEC_VSX_EXTRACT
UNSPEC_VSX_SXEXPDP
UNSPEC_VSX_SXSIG
UNSPEC_VSX_SIEXPDP
@@ -2177,10 +2178,19 @@
UNSPEC_VSX_CVHPSP))]
"TARGET_P9_VECTOR"
"xvcvhpsp %x0,%x1"
[(set_attr "type" "vecfloat")])
+;; Generate xvcvsphp
+(define_insn "vsx_xvcvsphp"
+ [(set (match_operand:V4SI 0 "register_operand" "=wa")
+ (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
+ UNSPEC_VSX_XVCVSPHP))]
+ "TARGET_P9_VECTOR"
+ "xvcvsphp %x0,%x1"
+[(set_attr "type" "vecfloat")])
+
;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF
;; format of scalars is actually DF.
(define_insn "vsx_xscvdpsp_scalar"
[(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
(unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-1-p9-runnable.c
b/gcc/testsuite/gcc.target/powerpc/builtins-1-p9-runnable.c
index 0e4ab48..e08e596 100644
--- a/gcc/testsuite/gcc.target/powerpc/builtins-1-p9-runnable.c
+++ b/gcc/testsuite/gcc.target/powerpc/builtins-1-p9-runnable.c
@@ -1,25 +1,50 @@
-/* { dg-do run { target { powerpc*-*-linux* && { lp64 && p9vector_hw } } } } */
-/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-do run { target { powerpc*-*-linux* && { p9vector_hw } } } } */
+/* { dg-require-effective-target p9vector_hw } */
/* { dg-options "-O2 -mdejagnu-cpu=power9" } */
#include <altivec.h>
+#include <stdio.h>
void abort (void);
int main() {
int i;
vector float vfa, vfb;
- vector unsigned short vur, vuexpt;
+ vector unsigned short vresult, vexpected;
- vfa = (vector float){3.4, 5.0, 20.0, 50.9 };
- vfb = (vector float){10.0, 40.0, 70.0, 100.0 };
- vuexpt = (vector unsigned short){ 3, 5, 20, 50,
- 10, 40, 70, 100};
+ vfa = (vector float){0.4, 1.6, 20.0, 99.9 };
+ vfb = (vector float){10.0, -2.0, 70.0, 999.0 };
- vur = vec_pack_to_short_fp32 (vfa, vfb);
+ /* Expected results. */
+ vexpected = (vector unsigned short) { 0x3666, 0x3e66, 0x4d00, 0x563e,
+ 0x4900, 0xc000, 0x5460, 0x63ce};
+
+/*
+ vresult = vec_pack_to_short_fp32 (vfa, vfb);
+ This built-in converts a pair of vector floats into a single vector of
+ packed half-precision (F16) values. The result type is a vector of
+ signed shorts.
+ The expected codegen for this builtin is
+ xvcvsphp t, vfa
+ xvcvsphp u, vfb
+ if (little endian)
+ vpkuwum vresult, t, u
+ else
+ vpkuwum vresult, u, t
+*/
+
+ vresult = vec_pack_to_short_fp32 (vfa, vfb);
+
+#ifdef DEBUG
+ for(i = 0; i< 4; i++) { printf("i=[%d] %f \n",i,vfa[i]); }
+ for(i = 0; i< 4; i++) { printf("i=[%d] %f \n",i+4,vfb[i]); }
+ for(i = 0; i< 8; i++) { printf("i=[%d] %d \n",i,vresult[i]); }
+#endif
for(i = 0; i< 8; i++) {
- if (vur[i] != vuexpt[i])
+ if (vresult[i] != vexpected[i]) {
+ printf("i=[%d] 0x%x != 0x%x \n",i,vresult[i],vexpected[i]);
abort();
+ }
}
}