https://gcc.gnu.org/bugzilla/show_bug.cgi?id=117542
--- Comment #3 from Hongtao Liu <liuhongt at gcc dot gnu.org> ---
(In reply to Hongtao Liu from comment #2)
> (In reply to Richard Biener from comment #1)
> > It doesn't even unambiguously specify whether the mode is that of the source
> > or the destination. The original idea was of course that the size
> > unambiguously specifies the destination mode and thus specifying it would be
> > redundant. Making
> > all those optabs conversion optabs has some overhead and is useless in 99%
> > of
> > the cases.
> >
> > Can you combine both destination mode variants in vec_pack_trunc_VnSF and
> > use predicates to select?
> Then the mode of operand[0] will be hided in the predicates, I doubt it
> would fail below check in supportable_narrowing_operation
> 4739 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
>
diff --git a/gcc/expr.cc b/gcc/expr.cc
index aa6ee85e719..f935d0e7767 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -10900,6 +10900,30 @@ expand_expr_real_2 (const_sepops ops, rtx target,
machine_mode tmode,
expand_insn (icode, 4, eops);
return eops[0].value;
}
+ /* There're 2 kinds of half precison floating point, and
vec_pack_trunc_m
+ can't be overloaded. Making all those optabs conversion optabs has
+ some overhead and is useless in 99% of the cases. So the mode could
+ be hided in predicate and mode of type is real tmode. */
+ if (VECTOR_FLOAT_TYPE_P (type)
+ && VECTOR_FLOAT_TYPE_P (TREE_TYPE (treeop0))
+ && GET_MODE_SIZE (GET_MODE_INNER (TYPE_MODE (type))) == 2
+ && known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (treeop0)) * 2,
+ TYPE_VECTOR_SUBPARTS (type))
+ && tmode == E_VOIDmode)
+ {
+ mode = TYPE_MODE (TREE_TYPE (treeop0));
+ tmode = TYPE_MODE (type);
+ class expand_operand eops[3];
+ expand_operands (treeop0, treeop1,
+ subtarget, &op0, &op1, EXPAND_NORMAL);
+ this_optab = vec_pack_trunc_optab;
+ enum insn_code icode = optab_handler (this_optab, mode);
+ create_output_operand (&eops[0], target, tmode);
+ create_input_operand (&eops[1], op0, mode);
+ create_input_operand (&eops[2], op1, mode);
+ expand_insn (icode, 3, eops);
+ return eops[0].value;
+ }
mode = TYPE_MODE (TREE_TYPE (treeop0));
subtarget = NULL_RTX;
goto binop;
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 7a92da00f7d..d4744063045 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -15010,6 +15010,24 @@ supportable_narrowing_operation (code_helper code,
return true;
}
+ /* There're 2 kinds of half precison floating point, and vec_pack_trunc_m
+ can't be overloaded. Making all those optabs conversion optabs has
+ some overhead and is useless in 99% of the cases.
+ So check predicate here. */
+ if (c1 == VEC_PACK_TRUNC_EXPR
+ && VECTOR_FLOAT_TYPE_P (narrow_vectype)
+ && VECTOR_FLOAT_TYPE_P (vectype)
+ && GET_MODE_SIZE (GET_MODE_INNER (TYPE_MODE (narrow_vectype))) == 2
+ && known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
+ TYPE_VECTOR_SUBPARTS (narrow_vectype))
+ && insn_data[icode1].operand[0].predicate)
+ {
+ machine_mode dpmode = insn_data[icode1].operand[0].mode;
+ machine_mode dmode = TYPE_MODE (narrow_vectype);
+ if (insn_data[icode1].operand[0].predicate (gen_reg_rtx (dmode),
dpmode))
+ return true;
+ }
+