With the even/odd v4hi multiply pattern names, we'll automatically generate the same code for dot_prodv4hi. The dot_prodv8qi pattern was actively incorrect wrt output mode.
r~ * config/ia64/vect.md (smulv4hi3_highpart): New. (umulv4hi3_highpart): New. (vec_widen_smult_even_v4hi): Rename from pmpy2_even. (vec_widen_smult_odd_v4hi): Rename from pmpy2_odd. (udot_prodv8qi, sdot_prodv8qi): Remove. (sdot_prodv4hi, udot_prodv4hi): Remove. * config/ia64/ia64.c (ia64_expand_dot_prod_v8qi): Remove. * config/ia64/ia64-protos.h: Update. --- gcc/ChangeLog | 9 ++++ gcc/config/ia64/ia64-protos.h | 1 - gcc/config/ia64/ia64.c | 40 ------------------ gcc/config/ia64/vect.md | 88 +++++++++++----------------------------- 4 files changed, 33 insertions(+), 105 deletions(-) diff --git a/gcc/config/ia64/ia64-protos.h b/gcc/config/ia64/ia64-protos.h index 458b120..39d34d4 100644 --- a/gcc/config/ia64/ia64-protos.h +++ b/gcc/config/ia64/ia64-protos.h @@ -43,7 +43,6 @@ extern bool ia64_expand_vecint_minmax (enum rtx_code, enum machine_mode, rtx[]); extern void ia64_unpack_assemble (rtx, rtx, rtx, bool); extern void ia64_expand_unpack (rtx [], bool, bool); extern void ia64_expand_widen_sum (rtx[], bool); -extern void ia64_expand_dot_prod_v8qi (rtx[], bool); extern void ia64_expand_call (rtx, rtx, rtx, int); extern void ia64_split_call (rtx, rtx, rtx, rtx, rtx, int, int); extern void ia64_reload_gp (void); diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c index a138ab4..c7fb559 100644 --- a/gcc/config/ia64/ia64.c +++ b/gcc/config/ia64/ia64.c @@ -2096,46 +2096,6 @@ ia64_expand_widen_sum (rtx operands[3], bool unsignedp) emit_move_insn (operands[0], t); } -/* Emit a signed or unsigned V8QI dot product operation. */ - -void -ia64_expand_dot_prod_v8qi (rtx operands[4], bool unsignedp) -{ - rtx op1, op2, sn1, sn2, l1, l2, h1, h2; - rtx p1, p2, p3, p4, s1, s2, s3; - - op1 = operands[1]; - op2 = operands[2]; - sn1 = ia64_unpack_sign (op1, unsignedp); - sn2 = ia64_unpack_sign (op2, unsignedp); - - l1 = gen_reg_rtx (V4HImode); - l2 = gen_reg_rtx (V4HImode); - h1 = gen_reg_rtx (V4HImode); - h2 = gen_reg_rtx (V4HImode); - ia64_unpack_assemble (l1, op1, sn1, false); - ia64_unpack_assemble (l2, op2, sn2, false); - ia64_unpack_assemble (h1, op1, sn1, true); - ia64_unpack_assemble (h2, op2, sn2, true); - - p1 = gen_reg_rtx (V2SImode); - p2 = gen_reg_rtx (V2SImode); - p3 = gen_reg_rtx (V2SImode); - p4 = gen_reg_rtx (V2SImode); - emit_insn (gen_pmpy2_even (p1, l1, l2)); - emit_insn (gen_pmpy2_even (p2, h1, h2)); - emit_insn (gen_pmpy2_odd (p3, l1, l2)); - emit_insn (gen_pmpy2_odd (p4, h1, h2)); - - s1 = gen_reg_rtx (V2SImode); - s2 = gen_reg_rtx (V2SImode); - s3 = gen_reg_rtx (V2SImode); - emit_insn (gen_addv2si3 (s1, p1, p2)); - emit_insn (gen_addv2si3 (s2, p3, p4)); - emit_insn (gen_addv2si3 (s3, s1, operands[3])); - emit_insn (gen_addv2si3 (operands[0], s2, s3)); -} - /* Emit the appropriate sequence for a call. */ void diff --git a/gcc/config/ia64/vect.md b/gcc/config/ia64/vect.md index aa77b01..4d85aa0 100644 --- a/gcc/config/ia64/vect.md +++ b/gcc/config/ia64/vect.md @@ -278,7 +278,29 @@ "pmpyshr2.u %0 = %1, %2, %3" [(set_attr "itanium_class" "mmmul")]) -(define_insn "pmpy2_even" +(define_expand "smulv4hi3_highpart" + [(match_operand:V4HI 0 "gr_register_operand") + (match_operand:V4HI 1 "gr_register_operand") + (match_operand:V4HI 2 "gr_register_operand")] + "" +{ + emit_insn (gen_pmpyshr2 (operands[0], operands[1], + operands[2], GEN_INT (16))); + DONE; +}) + +(define_expand "umulv4hi3_highpart" + [(match_operand:V4HI 0 "gr_register_operand") + (match_operand:V4HI 1 "gr_register_operand") + (match_operand:V4HI 2 "gr_register_operand")] + "" +{ + emit_insn (gen_pmpyshr2_u (operands[0], operands[1], + operands[2], GEN_INT (16))); + DONE; +}) + +(define_insn "vec_widen_smult_even_v4hi" [(set (match_operand:V2SI 0 "gr_register_operand" "=r") (mult:V2SI (vec_select:V2SI @@ -299,7 +321,7 @@ } [(set_attr "itanium_class" "mmshf")]) -(define_insn "pmpy2_odd" +(define_insn "vec_widen_smult_odd_v4hi" [(set (match_operand:V2SI 0 "gr_register_operand" "=r") (mult:V2SI (vec_select:V2SI @@ -602,68 +624,6 @@ DONE; }) -(define_expand "udot_prodv8qi" - [(match_operand:V2SI 0 "gr_register_operand" "") - (match_operand:V8QI 1 "gr_register_operand" "") - (match_operand:V8QI 2 "gr_register_operand" "") - (match_operand:V2SI 3 "gr_register_operand" "")] - "" -{ - ia64_expand_dot_prod_v8qi (operands, true); - DONE; -}) - -(define_expand "sdot_prodv8qi" - [(match_operand:V2SI 0 "gr_register_operand" "") - (match_operand:V8QI 1 "gr_register_operand" "") - (match_operand:V8QI 2 "gr_register_operand" "") - (match_operand:V2SI 3 "gr_register_operand" "")] - "" -{ - ia64_expand_dot_prod_v8qi (operands, false); - DONE; -}) - -(define_expand "sdot_prodv4hi" - [(match_operand:V2SI 0 "gr_register_operand" "") - (match_operand:V4HI 1 "gr_register_operand" "") - (match_operand:V4HI 2 "gr_register_operand" "") - (match_operand:V2SI 3 "gr_register_operand" "")] - "" -{ - rtx e, o, t; - - e = gen_reg_rtx (V2SImode); - o = gen_reg_rtx (V2SImode); - t = gen_reg_rtx (V2SImode); - - emit_insn (gen_pmpy2_even (e, operands[1], operands[2])); - emit_insn (gen_pmpy2_odd (o, operands[1], operands[2])); - emit_insn (gen_addv2si3 (t, e, operands[3])); - emit_insn (gen_addv2si3 (operands[0], t, o)); - DONE; -}) - -(define_expand "udot_prodv4hi" - [(match_operand:V2SI 0 "gr_register_operand" "") - (match_operand:V4HI 1 "gr_register_operand" "") - (match_operand:V4HI 2 "gr_register_operand" "") - (match_operand:V2SI 3 "gr_register_operand" "")] - "" -{ - rtx l, h, t; - - l = gen_reg_rtx (V2SImode); - h = gen_reg_rtx (V2SImode); - t = gen_reg_rtx (V2SImode); - - emit_insn (gen_vec_widen_umult_lo_v4hi (l, operands[1], operands[2])); - emit_insn (gen_vec_widen_umult_hi_v4hi (h, operands[1], operands[2])); - emit_insn (gen_addv2si3 (t, l, operands[3])); - emit_insn (gen_addv2si3 (operands[0], t, h)); - DONE; -}) - (define_expand "vcond<mode><mode>" [(set (match_operand:VECINT 0 "gr_register_operand" "") (if_then_else:VECINT