Segher, Will: Just wanted to ping you both on this patch. It has been out there for awhile.
Carl On Mon, 2020-12-07 at 16:31 -0800, Carl Love wrote: > Will: > > I have addressed you comments with regards to the Change Log > entries. > > The extra define vec_div was removed. > > Added the missing entries for DIVU_V2DI DIVS_V2DI in rs6000-call.c. > > The extra MULLD_V2DI case statement entry was removed. > > Added comment in rs6000.md about size for vector types per discussion > with Pat. > > Carl > -------------------------------------------------------- > > GCC maintainers: > > The following patch adds new builtins for the vector integer > multiply, > divide and modulo operations. The builtins are: vec_mulh(), > vec_dive(), vec_mod() for signed and unsigned integers and long > longintegers. The existing support for the vec_div()and vec_mul() > builtins emulate the vector operations with multiple scalar > instructions. This patch adds support for these builtins using the > new > vector instructions for Power 10. > > The patch was compiled and tested on: > > powerpc64le-unknown-linux-gnu (Power 9 LE) > powerpc64le-unknown-linux-gnu (Power 10 LE) > > with no regressions. Additionally the new test case was compiled and > executed by hand on Mambo to verify the test case passes. > > Please let me know if this patch is acceptable for mainline. Thanks. > > Carl Love > > ------------------------------------------------- > > From 15f9c090106c62af83cc405414466ad03d1a4c55 Mon Sep 17 00:00:00 > 2001 > From: Carl Love <c...@us.ibm.com> > Date: Fri, 4 Sep 2020 19:24:22 -0500 > Subject: [PATCH] rs6000, vector integer multiply/divide/modulo > instructions > > 2020-12-07 Carl Love <c...@us.ibm.com> > > gcc/ > * config/rs6000/altivec.h (vec_mulh, vec_dive, vec_mod): New > defines. > * config/rs6000/altivec.md (VIlong): Move define to file > vsx.md. > * config/rs6000/rs6000-builtin.def (DIVES_V4SI, DIVES_V2DI, > DIVEU_V4SI, DIVEU_V2DI, DIVS_V4SI, DIVS_V2DI, DIVU_V4SI, > DIVU_V2DI, MODS_V2DI, MODS_V4SI, MODU_V2DI, MODU_V4SI, > MULHS_V2DI, MULHS_V4SI, MULHU_V2DI, MULHU_V4SI, MULLD_V2DI): > Add builtin define. > (MULH, DIVE, MOD): Add new BU_P10_OVERLOAD_2 definitions. > * config/rs6000/rs6000-call.c (altivec_overloaded_builtins): > Add > VSX_BUILTIN_VEC_DIV, P10_BUILTIN_VEC_VDIVE, > P10_BUILTIN_VEC_VDIVE, P10_BUILTIN_VEC_VMOD, > P10_BUILTIN_VEC_VMULH > overloaded definitions. > (builtin_function_type) [P10V_BUILTIN_DIVEU_V4SI, > P10V_BUILTIN_DIVEU_V2DI, P10V_BUILTIN_DIVU_V4SI, > P10V_BUILTIN_DIVU_V2DI, P10V_BUILTIN_MODU_V2DI, > P10V_BUILTIN_MODU_V4SI, P10V_BUILTIN_MULHU_V2DI, > P10V_BUILTIN_MULHU_V4SI, P10V_BUILTIN_MULLD_V2DI]: Add case > statements for builtins. > * config/rs6000/rs6000.md (bits): Add new attribute sizes. > * config/rs6000/vsx.md (VIlong): New define_mode_iterator. > (UNSPEC_VDIVES, UNSPEC_VDIVEU): New unspec definitions. > (vsx_mul_v2di): Add if TARGET_POWER10 statement. > (vsx_udiv_v2di): Add if TARGET_POWER10 statement. > (dives_<mode>, diveu_<mode>, div<mode>3, uvdiv<mode>3, > mods_<mode>, modu_<mode>, mulhs_<mode>, mulhu_<mode>, > mulv2di3): > Add define_insn, mode is VIlong. > doc/extend.texi (vec_mulh, vec_mul, vec_div, vec_dive, > vec_mod): Add > builtin descriptions. > > gcc/testsuite/ > * gcc.target/powerpc/builtins-1-p10-runnable.c: New test file. > --- > gcc/config/rs6000/altivec.h | 4 + > gcc/config/rs6000/altivec.md | 2 - > gcc/config/rs6000/rs6000-builtin.def | 22 + > gcc/config/rs6000/rs6000-call.c | 53 +++ > gcc/config/rs6000/rs6000.md | 4 +- > gcc/config/rs6000/vsx.md | 212 +++++++--- > gcc/doc/extend.texi | 120 ++++++ > .../powerpc/builtins-1-p10-runnable.c | 398 > ++++++++++++++++++ > 8 files changed, 762 insertions(+), 53 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/powerpc/builtins-1-p10- > runnable.c > > diff --git a/gcc/config/rs6000/altivec.h > b/gcc/config/rs6000/altivec.h > index e1884f51bd8..b678e5cf28d 100644 > --- a/gcc/config/rs6000/altivec.h > +++ b/gcc/config/rs6000/altivec.h > @@ -750,6 +750,10 @@ __altivec_scalar_pred(vec_any_nle, > #define vec_strir_p(a) __builtin_vec_strir_p (a) > #define vec_stril_p(a) __builtin_vec_stril_p (a) > > +#define vec_mulh(a, b) __builtin_vec_mulh ((a), (b)) > +#define vec_dive(a, b) __builtin_vec_dive ((a), (b)) > +#define vec_mod(a, b) __builtin_vec_mod ((a), (b)) > + > /* VSX Mask Manipulation builtin. */ > #define vec_genbm __builtin_vec_mtvsrbm > #define vec_genhm __builtin_vec_mtvsrhm > diff --git a/gcc/config/rs6000/altivec.md > b/gcc/config/rs6000/altivec.md > index 6a6ce0f84ed..f10f1cdd8a7 100644 > --- a/gcc/config/rs6000/altivec.md > +++ b/gcc/config/rs6000/altivec.md > @@ -193,8 +193,6 @@ > > ;; Short vec int modes > (define_mode_iterator VIshort [V8HI V16QI]) > -;; Longer vec int modes for rotate/mask ops > -(define_mode_iterator VIlong [V2DI V4SI]) > ;; Vec float modes > (define_mode_iterator VF [V4SF]) > ;; Vec modes, pity mode iterators are not composable > diff --git a/gcc/config/rs6000/rs6000-builtin.def > b/gcc/config/rs6000/rs6000-builtin.def > index 47b1f74e616..e9ea2114615 100644 > --- a/gcc/config/rs6000/rs6000-builtin.def > +++ b/gcc/config/rs6000/rs6000-builtin.def > @@ -2883,6 +2883,24 @@ BU_P10V_AV_3 (VSRDB_V8HI, "vsrdb_v8hi", CONST, > vsrdb_v8hi) > BU_P10V_AV_3 (VSRDB_V4SI, "vsrdb_v4si", CONST, vsrdb_v4si) > BU_P10V_AV_3 (VSRDB_V2DI, "vsrdb_v2di", CONST, vsrdb_v2di) > > +BU_P10V_AV_2 (DIVES_V4SI, "vdivesw", CONST, dives_v4si) > +BU_P10V_AV_2 (DIVES_V2DI, "vdivesd", CONST, dives_v2di) > +BU_P10V_AV_2 (DIVEU_V4SI, "vdiveuw", CONST, diveu_v4si) > +BU_P10V_AV_2 (DIVEU_V2DI, "vdiveud", CONST, diveu_v2di) > +BU_P10V_AV_2 (DIVS_V4SI, "vdivsw", CONST, divv4si3) > +BU_P10V_AV_2 (DIVS_V2DI, "vdivsd", CONST, divv2di3) > +BU_P10V_AV_2 (DIVU_V4SI, "vdivuw", CONST, udivv4si3) > +BU_P10V_AV_2 (DIVU_V2DI, "vdivud", CONST, udivv2di3) > +BU_P10V_AV_2 (MODS_V2DI, "vmodsd", CONST, mods_v2di) > +BU_P10V_AV_2 (MODS_V4SI, "vmodsw", CONST, mods_v4si) > +BU_P10V_AV_2 (MODU_V2DI, "vmodud", CONST, modu_v2di) > +BU_P10V_AV_2 (MODU_V4SI, "vmoduw", CONST, modu_v4si) > +BU_P10V_AV_2 (MULHS_V2DI, "vmulhsd", CONST, mulhs_v2di) > +BU_P10V_AV_2 (MULHS_V4SI, "vmulhsw", CONST, mulhs_v4si) > +BU_P10V_AV_2 (MULHU_V2DI, "vmulhud", CONST, mulhu_v2di) > +BU_P10V_AV_2 (MULHU_V4SI, "vmulhuw", CONST, mulhu_v4si) > +BU_P10V_AV_2 (MULLD_V2DI, "vmulld", CONST, mulv2di3) > + > BU_P10V_VSX_1 (VXXSPLTIW_V4SI, "vxxspltiw_v4si", CONST, > xxspltiw_v4si) > BU_P10V_VSX_1 (VXXSPLTIW_V4SF, "vxxspltiw_v4sf", CONST, > xxspltiw_v4sf) > > @@ -2958,6 +2976,10 @@ BU_P10_OVERLOAD_1 (VSTRIL_P, "stril_p") > BU_P10_OVERLOAD_1 (XVTLSBB_ZEROS, "xvtlsbb_all_zeros") > BU_P10_OVERLOAD_1 (XVTLSBB_ONES, "xvtlsbb_all_ones") > > +BU_P10_OVERLOAD_2 (MULH, "mulh") > +BU_P10_OVERLOAD_2 (DIVE, "dive") > +BU_P10_OVERLOAD_2 (MOD, "mod") > + > > BU_P10_OVERLOAD_1 (MTVSRBM, "mtvsrbm") > BU_P10_OVERLOAD_1 (MTVSRHM, "mtvsrhm") > diff --git a/gcc/config/rs6000/rs6000-call.c > b/gcc/config/rs6000/rs6000-call.c > index 45bc048b5c7..da442e400b9 100644 > --- a/gcc/config/rs6000/rs6000-call.c > +++ b/gcc/config/rs6000/rs6000-call.c > @@ -1069,6 +1069,40 @@ const struct altivec_builtin_types > altivec_overloaded_builtins[] = { > RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, > { VSX_BUILTIN_VEC_DIV, VSX_BUILTIN_UDIV_V2DI, > RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, > RS6000_BTI_unsigned_V2DI, 0 }, > + > + { VSX_BUILTIN_VEC_DIV, P10V_BUILTIN_DIVS_V4SI, > + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, > + { VSX_BUILTIN_VEC_DIV, P10V_BUILTIN_DIVU_V4SI, > + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, > + RS6000_BTI_unsigned_V4SI, 0 }, > + { VSX_BUILTIN_VEC_DIV, P10V_BUILTIN_DIVS_V2DI, > + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, > + { VSX_BUILTIN_VEC_DIV, P10V_BUILTIN_DIVU_V2DI, > + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, > + RS6000_BTI_unsigned_V2DI, 0 }, > + > + { P10_BUILTIN_VEC_DIVE, P10V_BUILTIN_DIVES_V4SI, > + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, > + { P10_BUILTIN_VEC_DIVE, P10V_BUILTIN_DIVEU_V4SI, > + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, > + RS6000_BTI_unsigned_V4SI, 0 }, > + { P10_BUILTIN_VEC_DIVE, P10V_BUILTIN_DIVES_V2DI, > + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, > + { P10_BUILTIN_VEC_DIVE, P10V_BUILTIN_DIVEU_V2DI, > + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, > + RS6000_BTI_unsigned_V2DI, 0 }, > + > + { P10_BUILTIN_VEC_MOD, P10V_BUILTIN_MODS_V4SI, > + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, > + { P10_BUILTIN_VEC_MOD, P10V_BUILTIN_MODU_V4SI, > + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, > + RS6000_BTI_unsigned_V4SI, 0 }, > + { P10_BUILTIN_VEC_MOD, P10V_BUILTIN_MODS_V2DI, > + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, > + { P10_BUILTIN_VEC_MOD, P10V_BUILTIN_MODU_V2DI, > + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, > + RS6000_BTI_unsigned_V2DI, 0 }, > + > { VSX_BUILTIN_VEC_DOUBLE, VSX_BUILTIN_XVCVSXDDP, > RS6000_BTI_V2DF, RS6000_BTI_V2DI, 0, 0 }, > { VSX_BUILTIN_VEC_DOUBLE, VSX_BUILTIN_XVCVUXDDP, > @@ -1909,6 +1943,17 @@ const struct altivec_builtin_types > altivec_overloaded_builtins[] = { > RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI, > RS6000_BTI_unsigned_V16QI, 0 }, > { ALTIVEC_BUILTIN_VEC_VMINUB, ALTIVEC_BUILTIN_VMINUB, > RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, > RS6000_BTI_bool_V16QI, 0 }, > + { P10_BUILTIN_VEC_MULH, P10V_BUILTIN_MULHS_V4SI, > + RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 }, > + { P10_BUILTIN_VEC_MULH, P10V_BUILTIN_MULHU_V4SI, > + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, > + RS6000_BTI_unsigned_V4SI, 0 }, > + { P10_BUILTIN_VEC_MULH, P10V_BUILTIN_MULHS_V2DI, > + RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 }, > + { P10_BUILTIN_VEC_MULH, P10V_BUILTIN_MULHU_V2DI, > + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, > + RS6000_BTI_unsigned_V2DI, 0 }, > + > { ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULEUB, > RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI, > RS6000_BTI_unsigned_V16QI, 0 }, > { ALTIVEC_BUILTIN_VEC_MULE, ALTIVEC_BUILTIN_VMULESB, > @@ -14438,6 +14483,14 @@ builtin_function_type (machine_mode > mode_ret, machine_mode mode_arg0, > case P10V_BUILTIN_XXGENPCVM_V8HI: > case P10V_BUILTIN_XXGENPCVM_V4SI: > case P10V_BUILTIN_XXGENPCVM_V2DI: > + case P10V_BUILTIN_DIVEU_V4SI: > + case P10V_BUILTIN_DIVEU_V2DI: > + case P10V_BUILTIN_DIVU_V4SI: > + case P10V_BUILTIN_DIVU_V2DI: > + case P10V_BUILTIN_MODU_V2DI: > + case P10V_BUILTIN_MODU_V4SI: > + case P10V_BUILTIN_MULHU_V2DI: > + case P10V_BUILTIN_MULHU_V4SI: > h.uns_p[0] = 1; > h.uns_p[1] = 1; > h.uns_p[2] = 1; > diff --git a/gcc/config/rs6000/rs6000.md > b/gcc/config/rs6000/rs6000.md > index b89990f46bf..7dea1dfb1d5 100644 > --- a/gcc/config/rs6000/rs6000.md > +++ b/gcc/config/rs6000/rs6000.md > @@ -669,8 +669,10 @@ > (V2DI "d")]) > > ;; How many bits in this mode? > +;; For vector type, bits is the size of the elmement > (define_mode_attr bits [(QI "8") (HI "16") (SI "32") (DI "64") > - (SF "32") (DF "64")]) > + (SF "32") (DF "64") > + (V4SI "32") (V2DI "64")]) > > ; DImode bits > (define_mode_attr dbits [(QI "56") (HI "48") (SI "32")]) > diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md > index 947631d83ee..950f655cdb7 100644 > --- a/gcc/config/rs6000/vsx.md > +++ b/gcc/config/rs6000/vsx.md > @@ -267,6 +267,10 @@ > (define_mode_iterator VSX_MM [V16QI V8HI V4SI V2DI V1TI]) > (define_mode_iterator VSX_MM4 [V16QI V8HI V4SI V2DI]) > > +;; Longer vec int modes for rotate/mask ops > +;; and Vector Integer Multiply/Divide/Modulo Instructions > +(define_mode_iterator VIlong [V2DI V4SI]) > + > ;; Constants for creating unspecs > (define_c_enum "unspec" > [UNSPEC_VSX_CONCAT > @@ -363,8 +367,11 @@ > UNSPEC_INSERTR > UNSPEC_REPLACE_ELT > UNSPEC_REPLACE_UN > + UNSPEC_VDIVES > + UNSPEC_VDIVEU > ]) > > + > (define_int_iterator XVCVBF16 [UNSPEC_VSX_XVCVSPBF16 > UNSPEC_VSX_XVCVBF16SPN]) > > @@ -1623,28 +1630,35 @@ > rtx op0 = operands[0]; > rtx op1 = operands[1]; > rtx op2 = operands[2]; > - rtx op3 = gen_reg_rtx (DImode); > - rtx op4 = gen_reg_rtx (DImode); > - rtx op5 = gen_reg_rtx (DImode); > - emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0))); > - emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0))); > - if (TARGET_POWERPC64) > - emit_insn (gen_muldi3 (op5, op3, op4)); > - else > - { > - rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false); > - emit_move_insn (op5, ret); > - } > - emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1))); > - emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1))); > - if (TARGET_POWERPC64) > - emit_insn (gen_muldi3 (op3, op3, op4)); > + > + if (TARGET_POWER10) > + emit_insn (gen_mulv2di3 (op0, op1, op2) ); > + > else > { > - rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false); > - emit_move_insn (op3, ret); > + rtx op3 = gen_reg_rtx (DImode); > + rtx op4 = gen_reg_rtx (DImode); > + rtx op5 = gen_reg_rtx (DImode); > + emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0))); > + emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0))); > + if (TARGET_POWERPC64) > + emit_insn (gen_muldi3 (op5, op3, op4)); > + else > + { > + rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false); > + emit_move_insn (op5, ret); > + } > + emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1))); > + emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1))); > + if (TARGET_POWERPC64) > + emit_insn (gen_muldi3 (op3, op3, op4)); > + else > + { > + rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false); > + emit_move_insn (op3, ret); > + } > + emit_insn (gen_vsx_concat_v2di (op0, op5, op3)); > } > - emit_insn (gen_vsx_concat_v2di (op0, op5, op3)); > DONE; > } > [(set_attr "type" "mul")]) > @@ -1718,37 +1732,46 @@ > rtx op0 = operands[0]; > rtx op1 = operands[1]; > rtx op2 = operands[2]; > - rtx op3 = gen_reg_rtx (DImode); > - rtx op4 = gen_reg_rtx (DImode); > - rtx op5 = gen_reg_rtx (DImode); > - emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0))); > - emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0))); > - if (TARGET_POWERPC64) > - emit_insn (gen_udivdi3 (op5, op3, op4)); > - else > - { > - rtx libfunc = optab_libfunc (udiv_optab, DImode); > - rtx target = emit_library_call_value (libfunc, > - op5, LCT_NORMAL, DImode, > - op3, DImode, > - op4, DImode); > - emit_move_insn (op5, target); > - } > - emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1))); > - emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1))); > - if (TARGET_POWERPC64) > - emit_insn (gen_udivdi3 (op3, op3, op4)); > - else > - { > - rtx libfunc = optab_libfunc (udiv_optab, DImode); > - rtx target = emit_library_call_value (libfunc, > - op3, LCT_NORMAL, DImode, > - op3, DImode, > - op4, DImode); > - emit_move_insn (op3, target); > - } > - emit_insn (gen_vsx_concat_v2di (op0, op5, op3)); > - DONE; > + > + if (TARGET_POWER10) > + emit_insn (gen_udivv2di3 (op0, op1, op2) ); > + else > + { > + rtx op3 = gen_reg_rtx (DImode); > + rtx op4 = gen_reg_rtx (DImode); > + rtx op5 = gen_reg_rtx (DImode); > + > + emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0))); > + emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0))); > + > + if (TARGET_POWERPC64) > + emit_insn (gen_udivdi3 (op5, op3, op4)); > + else > + { > + rtx libfunc = optab_libfunc (udiv_optab, DImode); > + rtx target = emit_library_call_value (libfunc, > + op5, LCT_NORMAL, > DImode, > + op3, DImode, > + op4, DImode); > + emit_move_insn (op5, target); > + } > + emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1))); > + emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1))); > + > + if (TARGET_POWERPC64) > + emit_insn (gen_udivdi3 (op3, op3, op4)); > + else > + { > + rtx libfunc = optab_libfunc (udiv_optab, DImode); > + rtx target = emit_library_call_value (libfunc, > + op3, LCT_NORMAL, > DImode, > + op3, DImode, > + op4, DImode); > + emit_move_insn (op3, target); > + } > + emit_insn (gen_vsx_concat_v2di (op0, op5, op3)); > + } > + DONE; > } > [(set_attr "type" "div")]) > > @@ -6104,3 +6127,92 @@ > "TARGET_POWER10" > "vexpand<wd>m %0,%1" > [(set_attr "type" "vecsimple")]) > + > +(define_insn "dives_<mode>" > + [(set (match_operand:VIlong 0 "vsx_register_operand" "=v") > + (unspec:VIlong [(match_operand:VIlong 1 > "vsx_register_operand" "v") > + (match_operand:VIlong 2 "vsx_register_operand" > "v")] > + UNSPEC_VDIVES))] > + "TARGET_POWER10" > + "vdives<wd> %0,%1,%2" > + [(set_attr "type" "vecdiv") > + (set_attr "size" "<bits>")]) > + > +(define_insn "diveu_<mode>" > + [(set (match_operand:VIlong 0 "vsx_register_operand" "=v") > + (unspec: VIlong [(match_operand:VIlong 1 > "vsx_register_operand" "v") > + (match_operand:VIlong 2 "vsx_register_operand" > "v")] > + UNSPEC_VDIVEU))] > + "TARGET_POWER10" > + "vdiveu<wd> %0,%1,%2" > + [(set_attr "type" "vecdiv") > + (set_attr "size" "<bits>")]) > + > +(define_insn "div<mode>3" > + [(set (match_operand:VIlong 0 "vsx_register_operand" "=v") > + (div:VIlong (match_operand:VIlong 1 "vsx_register_operand" "v") > + (match_operand:VIlong 2 "vsx_register_operand" > "v")))] > + "TARGET_POWER10" > + "vdivs<wd> %0,%1,%2" > + [(set_attr "type" "vecdiv") > + (set_attr "size" "<bits>")]) > + > +(define_insn "udiv<mode>3" > + [(set (match_operand:VIlong 0 "vsx_register_operand" "=v") > + (udiv:VIlong (match_operand:VIlong 1 "vsx_register_operand" > "v") > + (match_operand:VIlong 2 "vsx_register_operand" > "v")))] > + "TARGET_POWER10" > + "vdivu<wd> %0,%1,%2" > + [(set_attr "type" "vecdiv") > + (set_attr "size" "<bits>")]) > + > +(define_insn "mods_<mode>" > + [(set (match_operand:VIlong 0 "vsx_register_operand" "=v") > + (mod:VIlong (match_operand:VIlong 1 "vsx_register_operand" "v") > + (match_operand:VIlong 2 "vsx_register_operand" > "v")))] > + "TARGET_POWER10" > + "vmods<wd> %0,%1,%2" > + [(set_attr "type" "vecdiv") > + (set_attr "size" "<bits>")]) > + > +(define_insn "modu_<mode>" > + [(set (match_operand:VIlong 0 "vsx_register_operand" "=v") > + (umod:VIlong (match_operand:VIlong 1 "vsx_register_operand" > "v") > + (match_operand:VIlong 2 "vsx_register_operand" > "v")))] > + "TARGET_POWER10" > + "vmodu<wd> %0,%1,%2" > + [(set_attr "type" "vecdiv") > + (set_attr "size" "<bits>")]) > + > +(define_insn "mulhs_<mode>" > + [(set (match_operand:VIlong 0 "vsx_register_operand" "=v") > + (mult:VIlong (ashiftrt > + (match_operand:VIlong 1 "vsx_register_operand" > "v") > + (const_int 32)) > + (ashiftrt > + (match_operand:VIlong 2 "vsx_register_operand" > "v") > + (const_int 32))))] > + "TARGET_POWER10" > + "vmulhs<wd> %0,%1,%2" > + [(set_attr "type" "veccomplex")]) > + > +(define_insn "mulhu_<mode>" > + [(set (match_operand:VIlong 0 "vsx_register_operand" "=v") > + (us_mult:VIlong (ashiftrt > + (match_operand:VIlong 1 > "vsx_register_operand" "v") > + (const_int 32)) > + (ashiftrt > + (match_operand:VIlong 2 > "vsx_register_operand" "v") > + (const_int 32))))] > + "TARGET_POWER10" > + "vmulhu<wd> %0,%1,%2" > + [(set_attr "type" "veccomplex")]) > + > +;; Vector multiply low double word > +(define_insn "mulv2di3" > + [(set (match_operand:V2DI 0 "vsx_register_operand" "=v") > + (mult:V2DI (match_operand:V2DI 1 "vsx_register_operand" "v") > + (match_operand:V2DI 2 "vsx_register_operand" "v")))] > + "TARGET_POWER10" > + "vmulld %0,%1,%2" > + [(set_attr "type" "veccomplex")]) > diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi > index 0c969085d1f..3c2d2fa892f 100644 > --- a/gcc/doc/extend.texi > +++ b/gcc/doc/extend.texi > @@ -21642,6 +21642,126 @@ integer value between 0 and 255 inclusive. > @exdent vector unsigned int vec_genpcvm (vector unsigned long long > int, > const int) > @end smallexample > + > +Vector Integer Multiply/Divide/Modulo > + > +@smallexample > +@exdent vector signed int > +@exdent vec_mulh (vector signed int a, vector signed int b) > +@exdent vector unsigned int > +@exdent vec_mulh (vector unsigned int a, vector unsigned int b) > +@end smallexample > + > +For each integer value @code{i} from 0 to 3, do the following. The > integer > +value in word element @code{i} of a is multiplied by the integer > value in word > +element @code{i} of b. The high-order 32 bits of the 64-bit product > are placed > +into word element @code{i} of the vector returned. > + > +@smallexample > +@exdent vector signed long long > +@exdent vec_mulh (vector signed long long a, vector signed long long > b) > +@exdent vector unsigned long long > +@exdent vec_mulh (vector unsigned long long a, vector unsigned long > long b) > +@end smallexample > + > +For each integer value @code{i} from 0 to 1, do the following. The > integer > +value in doubleword element @code{i} of a is multiplied by the > integer value in > +doubleword element @code{i} of b. The high-order 64 bits of the 128- > bit product > +are placed into doubleword element @code{i} of the vector returned. > + > +@smallexample > +@exdent vector unsigned long long > +@exdent vec_mul (vector unsigned long long a, vector unsigned long > long b) > +@exdent vector signed long long > +@exdent vec_mul (vector signed long long a, vector signed long long > b) > +@end smallexample > + > +For each integer value @code{i} from 0 to 1, do the following. The > integer > +value in doubleword element @code{i} of a is multiplied by the > integer value in > +doubleword element @code{i} of b. The low-order 64 bits of the 128- > bit product > +are placed into doubleword element @code{i} of the vector returned. > + > +@smallexample > +@exdent vector signed int > +@exdent vec_div (vector signed int a, vector signed int b) > +@exdent vector unsigned int > +@exdent vec_div (vector unsigned int a, vector unsigned int b) > +@end smallexample > + > +For each integer value @code{i} from 0 to 3, do the following. The > integer in > +word element @code{i} of a is divided by the integer in word element > @code{i} > +of b. The unique integer quotient is placed into the word element > @code{i} of > +the vector returned. If an attempt is made to perform any of the > divisions > +<anything> ÷ 0 then the quotient is undefined. > + > +@smallexample > +@exdent vector signed long long > +@exdent vec_div (vector signed long long a, vector signed long long > b) > +@exdent vector unsigned long long > +@exdent vec_div (vector unsigned long long a, vector unsigned long > long b) > +@end smallexample > + > +For each integer value @code{i} from 0 to 1, do the following. The > integer in > +doubleword element @code{i} of a is divided by the integer in > doubleword > +element @code{i} of b. The unique integer quotient is placed into > the > +doubleword element @code{i} of the vector returned. If an attempt is > made to > +perform any of the divisions 0x8000_0000_0000_0000 ÷ -1 or > <anything> ÷ 0 then > +the quotient is undefined. > + > +@smallexample > +@exdent vector signed int > +@exdent vec_dive (vector signed int a, vector signed int b) > +@exdent vector unsigned int > +@exdent vec_dive (vector unsigned int a, vector unsigned int b) > +@end smallexample > + > +For each integer value @code{i} from 0 to 3, do the following. The > integer in > +word element @code{i} of a is shifted left by 32 bits, then divided > by the > +integer in word element @code{i} of b. The unique integer quotient > is placed > +into the word element @code{i} of the vector returned. If the > quotient cannot > +be represented in 32 bits, or if an attempt is made to perform any > of the > +divisions <anything> ÷ 0 then the quotient is undefined. > + > +@smallexample > +@exdent vector signed long long > +@exdent vec_dive (vector signed long long a, vector signed long long > b) > +@exdent vector unsigned long long > +@exdent vec_dive (vector unsigned long long a, vector unsigned long > long b) > +@end smallexample > + > +For each integer value @code{i} from 0 to 1, do the following. The > integer in > +doubleword element @code{i} of a is shifted left by 64 bits, then > divided by > +the integer in doubleword element @code{i} of b. The unique integer > quotient is > +placed into the doubleword element @code{i} of the vector returned. > If the > +quotient cannot be represented in 64 bits, or if an attempt is made > to perform > +<anything> ÷ 0 then the quotient is undefined. > + > +@smallexample > +@exdent vector signed int > +@exdent vec_mod (vector signed int a, vector signed int b) > +@exdent vector unsigned int > +@exdent vec_mod (vector unsigned int a, vector unsigned int b) > +@end smallexample > + > +For each integer value @code{i} from 0 to 3, do the following. The > integer in > +word element @code{i} of a is divided by the integer in word element > @code{i} > +of b. The unique integer remainder is placed into the word element > @code{i} of > +the vector returned. If an attempt is made to perform any of the > divisions > +0x8000_0000 ÷ -1 or <anything> ÷ 0 then the remainder is undefined. > + > +@smallexample > +@exdent vector signed long long > +@exdent vec_mod (vector signed long long a, vector signed long long > b) > +@exdent vector unsigned long long > +@exdent vec_mod (vector unsigned long long a, vector unsigned long > long b) > +@end smallexample > + > +For each integer value @code{i} from 0 to 1, do the following. The > integer in > +doubleword element @code{i} of a is divided by the integer in > doubleword > +element @code{i} of b. The unique integer remainder is placed into > the > +doubleword element @code{i} of the vector returned. If an attempt is > made to > +perform <anything> ÷ 0 then the remainder is undefined. > + > Generate PCV from specified Mask size, as if implemented by the > @code{xxgenpcvbm}, @code{xxgenpcvhm}, @code{xxgenpcvwm} > instructions, where > immediate value is either 0, 1, 2 or 3. > diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-1-p10- > runnable.c b/gcc/testsuite/gcc.target/powerpc/builtins-1-p10- > runnable.c > new file mode 100644 > index 00000000000..222c8b3a409 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/builtins-1-p10-runnable.c > @@ -0,0 +1,398 @@ > +/* { dg-do run } */ > +/* { dg-require-effective-target power10_hw } */ > +/* { dg-options "-mdejagnu-cpu=power10 -save-temps" } */ > + > +/* { dg-final { scan-assembler-times {\mvdivsw\M} 1 } } */ > +/* { dg-final { scan-assembler-times {\mvdivuw\M} 1 } } */ > +/* { dg-final { scan-assembler-times {\mvdivsd\M} 1 } } */ > +/* { dg-final { scan-assembler-times {\mvdivud\M} 1 } } */ > +/* { dg-final { scan-assembler-times {\mvdivesw\M} 1 } } */ > +/* { dg-final { scan-assembler-times {\mvdiveuw\M} 1 } } */ > +/* { dg-final { scan-assembler-times {\mvdivesd\M} 1 } } */ > +/* { dg-final { scan-assembler-times {\mvdiveud\M} 1 } } */ > +/* { dg-final { scan-assembler-times {\mvmodsw\M} 1 } } */ > +/* { dg-final { scan-assembler-times {\mvmoduw\M} 1 } } */ > +/* { dg-final { scan-assembler-times {\mvmodsd\M} 1 } } */ > +/* { dg-final { scan-assembler-times {\mvmodud\M} 1 } } */ > +/* { dg-final { scan-assembler-times {\mvmulhsw\M} 1 } } */ > +/* { dg-final { scan-assembler-times {\mvmulhuw\M} 1 } } */ > +/* { dg-final { scan-assembler-times {\mvmulhsd\M} 1 } } */ > +/* { dg-final { scan-assembler-times {\mvmulhud\M} 1 } } */ > +/* { dg-final { scan-assembler-times {\mvmulld\M} 2 } } */ > + > +#include <stdint.h> > +#include <stdio.h> > +#include <math.h> > +#include <altivec.h> > + > +#define DEBUG 0 > + > +#ifdef DEBUG > +#include <stdio.h> > +#endif > + > +void abort (void); > + > +int main() > + { > + int i; > + vector int i_arg1, i_arg2; > + vector unsigned int u_arg1, u_arg2; > + vector long long int d_arg1, d_arg2; > + vector long long unsigned int ud_arg1, ud_arg2; > + > + vector int vec_i_expected, vec_i_result; > + vector unsigned int vec_u_expected, vec_u_result; > + vector long long int vec_d_expected, vec_d_result; > + vector long long unsigned int vec_ud_expected, vec_ud_result; > + > + /* Signed word divide */ > + i_arg1 = (vector int){ 20, 40, 60, 80}; > + i_arg2 = (vector int){ 2, 2, 2, 2}; > + vec_i_expected = (vector int){10, 20, 30, 40}; > + > + vec_i_result = vec_div (i_arg1, i_arg2); > + > + for (i = 0; i < 4; i++) > + { > + if (vec_i_expected[i] != vec_i_result[i]) > +#ifdef DEBUG > + printf("ERROR vec_div signed result[%d] = %d != " > + "expected[%d] = %d\n", > + i, vec_i_result[i], i, vec_i_expected[i]); > +#else > + abort(); > +#endif > + } > + > + /* Unsigned word divide */ > + u_arg1 = (vector unsigned int){ 20, 40, 60, 80}; > + u_arg2 = (vector unsigned int){ 2, 2, 2, 2}; > + vec_u_expected = (vector unsigned int){10, 20, 30, 40}; > + > + vec_u_result = vec_div (u_arg1, u_arg2); > + > + for (i = 0; i < 4; i++) > + { > + if (vec_u_expected[i] != vec_u_result[i]) > +#ifdef DEBUG > + printf("ERROR vec_div unsigned result[%d] = %d != " > + "expected[%d] = %d\n", > + i, vec_u_result[i], i, vec_u_expected[i]); > +#else > + abort(); > +#endif > + } > + > + /* Signed double word divide */ > + d_arg1 = (vector long long){ 24, 68}; > + d_arg2 = (vector long long){ 2, 2}; > + vec_d_expected = (vector long long){12, 34}; > + > + vec_d_result = vec_div (d_arg1, d_arg2); > + > + for (i = 0; i < 2; i++) > + { > + if (vec_d_expected[i] != vec_d_result[i]) > +#ifdef DEBUG > + printf("ERROR vec_div signed result[%d] = %d != " > + "expected[%d] = %d\n", > + i, vec_d_result[i], i, vec_d_expected[i]); > +#else > + abort(); > +#endif > + } > + > + /* Unsigned double word divide */ > + ud_arg1 = (vector unsigned long long){ 24, 68}; > + ud_arg2 = (vector unsigned long long){ 2, 2}; > + vec_ud_expected = (vector unsigned long long){12, 34}; > + > + vec_ud_result = vec_div (ud_arg1, ud_arg2); > + > + for (i = 0; i < 2; i++) > + { > + if (vec_ud_expected[i] != vec_ud_result[i]) > +#ifdef DEBUG > + printf("ERROR vec_div unsigned result[%d] = %d != " > + "expected[%d] = %d\n", > + i, vec_ud_result[i], i, vec_ud_expected[i]); > +#else > + abort(); > +#endif > + } > + > + /* Divide Extended signed word result = (arg1 << 32)/arg2 */ > + i_arg1 = (vector int){ 2, 4, 6, 8}; > + i_arg2 = (vector int){ 2048, 2048, 2048, 2048}; > + vec_i_expected = (vector int){4194304, 8388608, 12582912, > 16777216}; > + > + vec_i_result = vec_dive (i_arg1, i_arg2); > + > + for (i = 0; i < 4; i++) > + { > + if (vec_i_expected[i] != vec_i_result[i]) > +#ifdef DEBUG > + printf("ERROR vec_dive signed result[%d] = %d != " > + "expected[%d] = %d\n", > + i, vec_i_result[i], i, vec_i_expected[i]); > +#else > + abort(); > +#endif > + } > + > + /* Divide Extended unsigned word result = (arg1 << 32)/arg2 */ > + u_arg1 = (vector unsigned int){ 2, 4, 6, 8}; > + u_arg2 = (vector unsigned int){ 2048, 2048, 2048, 2048}; > + vec_u_expected = (vector unsigned int){4194304, 8388608, > + 12582912, 16777216}; > + > + vec_u_result = vec_dive (u_arg1, u_arg2); > + > + for (i = 0; i < 4; i++) > + { > + if (vec_u_expected[i] != vec_u_result[i]) > +#ifdef DEBUG > + printf("ERROR vec_dive unsigned result[%d] = %d != " > + "expected[%d] = %d\n", > + i, vec_u_result[i], i, vec_u_expected[i]); > +#else > + abort(); > +#endif > + } > + > + /* Divide Extended double signed esult = (arg1 << 64)/arg2 */ > + d_arg1 = (vector long long int){ 2, 4}; > + d_arg2 = (vector long long int){ 4294967296, 4294967296}; > + > + vec_d_expected = (vector long long int){8589934592, > 17179869184}; > + > + vec_d_result = vec_dive (d_arg1, d_arg2); > + > + for (i = 0; i < 2; i++) > + { > + if (vec_d_expected[i] != vec_d_result[i]) > +#ifdef DEBUG > + printf("ERROR vec_dive signed result[%d] = %lld != " > + "expected[%d] = %lld\n", > + i, vec_d_result[i], i, vec_d_expected[i]); > +#else > + abort(); > +#endif > + } > + > + /* Divide Extended double unsigned result = (arg1 << 64)/arg2 */ > + ud_arg1 = (vector long long unsigned int){ 2, 4}; > + ud_arg2 = (vector long long unsigned int){ 4294967296, > 4294967296}; > + > + vec_ud_expected = (vector long long unsigned int){8589934592, > + 17179869184}; > + > + vec_ud_result = vec_dive (ud_arg1, ud_arg2); > + > + for (i = 0; i < 2; i++) > + { > + if (vec_ud_expected[i] != vec_ud_result[i]) > +#ifdef DEBUG > + printf("ERROR vec_dive unsigned result[%d] = %lld != " > + "expected[%d] = %lld\n", > + i, vec_ud_result[i], i, vec_ud_expected[i]); > +#else > + abort(); > +#endif > + } > + > + /* Signed word modulo */ > + i_arg1 = (vector int){ 23, 45, 61, 89}; > + i_arg2 = (vector int){ 2, 2, 2, 2}; > + vec_i_expected = (vector int){1, 1, 1, 1}; > + > + vec_i_result = vec_mod (i_arg1, i_arg2); > + > + for (i = 0; i < 4; i++) > + { > + if (vec_i_expected[i] != vec_i_result[i]) > +#ifdef DEBUG > + printf("ERROR vec_mod signed result[%d] = %d != " > + "expected[%d] = %d\n", > + i, vec_i_result[i], i, vec_i_expected[i]); > +#else > + abort(); > +#endif > + } > + > + /* Unsigned word modulo */ > + u_arg1 = (vector unsigned int){ 25, 41, 67, 86}; > + u_arg2 = (vector unsigned int){ 3, 3, 3, 3}; > + vec_u_expected = (vector unsigned int){1, 2, 1, 2}; > + > + vec_u_result = vec_mod (u_arg1, u_arg2); > + > + for (i = 0; i < 4; i++) > + { > + if (vec_u_expected[i] != vec_u_result[i]) > +#ifdef DEBUG > + printf("ERROR vec_mod unsigned result[%d] = %d != " > + "expected[%d] = %d\n", > + i, vec_u_result[i], i, vec_u_expected[i]); > +#else > + abort(); > +#endif > + } > + > + /* Signed double word modulo */ > + d_arg1 = (vector long long){ 24, 68}; > + d_arg2 = (vector long long){ 7, 7}; > + vec_d_expected = (vector long long){3, 5}; > + > + vec_d_result = vec_mod (d_arg1, d_arg2); > + > + for (i = 0; i < 2; i++) > + { > + if (vec_d_expected[i] != vec_d_result[i]) > +#ifdef DEBUG > + printf("ERROR vec_mod signed result[%d] = %d != " > + "expected[%d] = %d\n", > + i, vec_d_result[i], i, vec_d_expected[i]); > +#else > + abort(); > +#endif > + } > + > + /* Unsigned double word modulo */ > + ud_arg1 = (vector unsigned long long){ 24, 68}; > + ud_arg2 = (vector unsigned long long){ 8, 8}; > + vec_ud_expected = (vector unsigned long long){0, 4}; > + > + vec_ud_result = vec_mod (ud_arg1, ud_arg2); > + > + for (i = 0; i < 2; i++) > + { > + if (vec_ud_expected[i] != vec_ud_result[i]) > +#ifdef DEBUG > + printf("ERROR vecmod unsigned result[%d] = %d != " > + "expected[%d] = %d\n", > + i, vec_ud_result[i], i, vec_ud_expected[i]); > +#else > + abort(); > +#endif > + } > + > + /* Signed word multiply high */ > + i_arg1 = (vector int){ 2147483648, 2147483648, 2147483648, > 2147483648 }; > + i_arg2 = (vector int){ 2, 3, 4, 5}; > + // vec_i_expected = (vector int){-1, -2, -2, -3}; > + vec_i_expected = (vector int){1, -2, -2, -3}; > + > + vec_i_result = vec_mulh (i_arg1, i_arg2); > + > + for (i = 0; i < 4; i++) > + { > + if (vec_i_expected[i] != vec_i_result[i]) > +#ifdef DEBUG > + printf("ERROR vec_mulh signed result[%d] = %d != " > + "expected[%d] = %d\n", > + i, vec_i_result[i], i, vec_i_expected[i]); > +#else > + abort(); > +#endif > + } > + > + /* Unsigned word multiply high */ > + u_arg1 = (vector unsigned int){ 2147483648, 2147483648, > + 2147483648, 2147483648 }; > + u_arg2 = (vector unsigned int){ 4, 5, 6, 7 }; > + vec_u_expected = (vector unsigned int){2, 2, 3, 3 }; > + > + vec_u_result = vec_mulh (u_arg1, u_arg2); > + > + for (i = 0; i < 4; i++) > + { > + if (vec_u_expected[i] != vec_u_result[i]) > +#ifdef DEBUG > + printf("ERROR vec_mulh unsigned result[%d] = %d != " > + "expected[%d] = %d\n", > + i, vec_u_result[i], i, vec_u_expected[i]); > +#else > + abort(); > +#endif > + } > + > + /* Signed double word multiply high */ > + d_arg1 = (vector long long int){ 2305843009213693951, > + 4611686018427387903 }; > + d_arg2 = (vector long long int){ 12, 20 }; > + vec_d_expected = (vector long long int){ 1, 4 }; > + > + vec_d_result = vec_mulh (d_arg1, d_arg2); > + > + for (i = 0; i < 2; i++) > + { > + if (vec_d_expected[i] != vec_d_result[i]) > +#ifdef DEBUG > + printf("ERROR vec_mulh signed result[%d] = %d != " > + "expected[%d] = %d\n", > + i, vec_d_result[i], i, vec_d_expected[i]); > +#else > + abort(); > +#endif > + } > + > + /* Unsigned double word multiply high */ > + ud_arg1 = (vector unsigned long long int){ 2305843009213693951, > + 4611686018427387903 }; > + ud_arg2 = (vector unsigned long long int){ 32, 10 }; > + vec_ud_expected = (vector unsigned long long int){ 3, 2 }; > + > + vec_ud_result = vec_mulh (ud_arg1, ud_arg2); > + > + for (i = 0; i < 2; i++) > + { > + if (vec_ud_expected[i] != vec_ud_result[i]) > +#ifdef DEBUG > + printf("ERROR vec_mulh unsigned result[%d] = %d != " > + "expected[%d] = %d\n", > + i, vec_ud_result[i], i, vec_ud_expected[i]); > +#else > + abort(); > +#endif > + } > + > + /* Unsigned double word multiply low */ > + ud_arg1 = (vector unsigned long long int){ 2048, 4096 }; > + ud_arg2 = (vector unsigned long long int){ 2, 4 }; > + vec_ud_expected = (vector unsigned long long int){ 4096, 16384 > }; > + > + vec_ud_result = vec_mul (ud_arg1, ud_arg2); > + > + for (i = 0; i < 2; i++) > + { > + if (vec_ud_expected[i] != vec_ud_result[i]) > +#ifdef DEBUG > + printf("ERROR vec_mul unsigned result[%d] = %d != " > + "expected[%d] = %d\n", > + i, vec_ud_result[i], i, vec_ud_expected[i]); > +#else > + abort(); > +#endif > + } > + > + /* Signed double word multiply low */ > + d_arg1 = (vector signed long long int){ 2048, 4096 }; > + d_arg2 = (vector signed long long int){ 2, 4 }; > + vec_d_expected = (vector signed long long int){ 4096, 16384 }; > + > + vec_d_result = vec_mul (d_arg1, d_arg2); > + > + for (i = 0; i < 2; i++) > + { > + if (vec_d_expected[i] != vec_d_result[i]) > +#ifdef DEBUG > + printf("ERROR vec_mul signed result[%d] = %d != " > + "expected[%d] = %d\n", > + i, vec_d_result[i], i, vec_d_expected[i]); > +#else > + abort(); > +#endif > + } > + }