Hi,
The attached patch adds another parameter machine_mode op_mode to vec_perm_const
hook to specify mode of input operands. The motivation for doing this
is PR96463,
where we create vec_perm_expr of the form:
lhs = vec_perm_expr<rhs, mask>
where lhs and rhs have different vector types but same element type
(lhs is SVE and rhs is corresponding advsimd vector).
It seems the following targets were affected: aarch64, i386, arm, ia64,
mips, rs6000, s390, sparc, gcn.
Bootstrapped+tested on x86_64-linux-gnu, aarch64-linux-gnu.
For other targets, I did make all-gcc stage1, which seems to build OK.
Thanks,
Prathamesh
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index c5006afc00d..31ff6ef3f92 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -6088,7 +6088,7 @@ for the given scalar type @var{type}. @var{is_packed} is
false if the scalar
access using @var{type} is known to be naturally aligned.
@end deftypefn
-@deftypefn {Target Hook} bool TARGET_VECTORIZE_VEC_PERM_CONST (machine_mode
@var{mode}, rtx @var{output}, rtx @var{in0}, rtx @var{in1}, const
vec_perm_indices @var{&sel})
+@deftypefn {Target Hook} bool TARGET_VECTORIZE_VEC_PERM_CONST (machine_mode
@var{mode}, machine_mode @var{op_mode}, rtx @var{output}, rtx @var{in0}, rtx
@var{in1}, const vec_perm_indices @var{&sel})
This hook is used to test whether the target can permute up to two
vectors of mode @var{mode} using the permutation vector @code{sel}, and
also to emit such a permutation. In the former case @var{in0}, @var{in1}
diff --git a/gcc/optabs-query.cc b/gcc/optabs-query.cc
index 68dc679cc6a..aef9d4c5d28 100644
--- a/gcc/optabs-query.cc
+++ b/gcc/optabs-query.cc
@@ -417,8 +417,8 @@ can_vec_perm_var_p (machine_mode mode)
with here. */
bool
-can_vec_perm_const_p (machine_mode mode, const vec_perm_indices &sel,
- bool allow_variable_p)
+can_vec_perm_const_p (machine_mode mode, machine_mode op_mode,
+ const vec_perm_indices &sel, bool allow_variable_p)
{
/* If the target doesn't implement a vector mode for the vector type,
then no operations are supported. */
@@ -448,7 +448,7 @@ can_vec_perm_const_p (machine_mode mode, const
vec_perm_indices &sel,
if (targetm.vectorize.vec_perm_const != NULL)
{
- if (targetm.vectorize.vec_perm_const (mode, NULL_RTX, NULL_RTX,
+ if (targetm.vectorize.vec_perm_const (mode, op_mode, NULL_RTX, NULL_RTX,
NULL_RTX, sel))
return true;
@@ -462,6 +462,13 @@ can_vec_perm_const_p (machine_mode mode, const
vec_perm_indices &sel,
return false;
}
+bool
+can_vec_perm_const_p (machine_mode mode, const vec_perm_indices &sel,
+ bool allow_variable_p)
+{
+ return can_vec_perm_const_p (mode, mode, sel, allow_variable_p);
+}
+
/* Find a widening optab even if it doesn't widen as much as we want.
E.g. if from_mode is HImode, and to_mode is DImode, and there is no
direct HI->SI insn, then return SI->DI, if that exists. */
diff --git a/gcc/optabs-query.h b/gcc/optabs-query.h
index b9c9fd6f64d..fa22c2210d8 100644
--- a/gcc/optabs-query.h
+++ b/gcc/optabs-query.h
@@ -178,6 +178,8 @@ bool can_conditionally_move_p (machine_mode mode);
opt_machine_mode qimode_for_vec_perm (machine_mode);
bool selector_fits_mode_p (machine_mode, const vec_perm_indices &);
bool can_vec_perm_var_p (machine_mode);
+bool can_vec_perm_const_p (machine_mode, machine_mode, const vec_perm_indices
&,
+ bool = true);
bool can_vec_perm_const_p (machine_mode, const vec_perm_indices &,
bool = true);
/* Find a widening optab even if it doesn't widen as much as we want. */
diff --git a/gcc/optabs.cc b/gcc/optabs.cc
index 3d8fa3abdfe..55f10c41789 100644
--- a/gcc/optabs.cc
+++ b/gcc/optabs.cc
@@ -6250,7 +6250,9 @@ expand_vec_perm_const (machine_mode mode, rtx v0, rtx v1,
if (single_arg_p)
v1 = v0;
- if (targetm.vectorize.vec_perm_const (mode, target, v0, v1, indices))
+ gcc_checking_assert (GET_MODE (v0) == GET_MODE (v1));
+ machine_mode op_mode = GET_MODE (v0);
+ if (targetm.vectorize.vec_perm_const (mode, op_mode, target, v0, v1,
indices))
return target;
}
@@ -6264,7 +6266,7 @@ expand_vec_perm_const (machine_mode mode, rtx v0, rtx v1,
v0_qi = gen_lowpart (qimode, v0);
v1_qi = gen_lowpart (qimode, v1);
if (targetm.vectorize.vec_perm_const != NULL
- && targetm.vectorize.vec_perm_const (qimode, target_qi, v0_qi,
+ && targetm.vectorize.vec_perm_const (qimode, qimode, target_qi, v0_qi,
v1_qi, qimode_indices))
return gen_lowpart (mode, target_qi);
}
diff --git a/gcc/target.def b/gcc/target.def
index d85adf36a39..2713c31dc3f 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -1894,7 +1894,7 @@ try the equivalent byte operation. If that also fails,
it will try forcing\n\
the selector into a register and using the @var{vec_perm@var{mode}}\n\
instruction pattern. There is no need for the hook to handle these two\n\
implementation approaches itself.",
- bool, (machine_mode mode, rtx output, rtx in0, rtx in1,
+ bool, (machine_mode mode, machine_mode op_mode, rtx output, rtx in0, rtx in1,
const vec_perm_indices &sel),
NULL)