Hi, I had the problem with passing information about single variable
from expand_vec_cond_expr optab into ix86_expand_*_vcond.
I looked into it this problem for quite a while and found a solution.
Now the question if it could be done better.
First of all the problem:
If we represent any vector comparison with VEC_COND_EXPR < v0 <OP> v1
? {-1,...} : {0,...} >, then in the assembler we do not want to see
this useless comparison with {-1...}.
Now it is easy to fix the problem about excessive masking. The real
challenge starts when the comparison inside vcond is expressed as a
variable. In that case in order to construct correct vector expression
we need to adjust cond in cond ? v0 : v1 to cond == {-1...} or as we
agreed recently cond != {0,..}. But hat we need to do only to make
vec_cond_expr happy. On the level of assembler we don't want this
condition.
Now, if I just construct the tree, then in x86, rtx_equal_p, does not
know that this is a constant vector full of -1, because the comparison
operands are not immediate. So I need somehow to mark the fact in
optabs, and then check the information in the x86.
At the moment I do something like this:
optabs:
if (!COMPARISON_CLASS_P (op0))
ops[3] = gen_rtx_EQ (mode, NULL_RTX, NULL_RTX);
This expression is preserved while checking and verifying.
ix86:
if (GET_CODE (comp) == EQ && XEXP (comp, 0) == NULL_RTX
&& XEXP (comp, 1) == NULL_RTX)
See the patch attached for more details. The patch is just to give you
an idea of the way I am doing it and it seems to work. Please don't
criticise the patch itself, better help me to understand if there is a
better way to pass the information from optabs to ix86.
Thanks,
Artem.
On Thu, Aug 18, 2011 at 3:31 PM, Richard Henderson <[email protected]> wrote:
> On 08/18/2011 02:23 AM, Richard Guenther wrote:
>>>> >> The first one (inefficient) is vec0 > vec1 ? {-1,...} : {0,...}
>>>> >> The second is vec0 > vec1. expand_vec_cond_expr is stupid, which is
>>>> >> fine, but it means that we need to construct it carefully.
>>> >
>>> > This is still important.
>> Yes. I think the backends need to handle optimizing this case,
>> esp. considering targets that do not have instructions to produce
>> a {-1,...}/{0,...} bitmask from a comparison but produce a vector
>> of condition codes. With using vec0 > vec1 ? {-1...} : {0,...} for
>> mask = vec0 > vec1; we avoid exposing the result kind of
>> vector comparisons.
>>
>> It should be easily possible for x86 for example to recognize
>> the -1 : 0 case.
>>
>
> I think you've been glossing over the hard part with "..." up there.
> I challenge you to actually fill that in with something meaningful
> in rtl.
>
> I suspect that you simply have to add another named pattern that
> will Do What You Want on mips and suchlike that produce a CCmode.
>
>
>
> r~
>
Index: gcc/optabs.c
===================================================================
--- gcc/optabs.c (revision 177665)
+++ gcc/optabs.c (working copy)
@@ -6557,6 +6557,8 @@ expand_vec_cond_expr_p (tree type, enum
/* Generate insns for a VEC_COND_EXPR, given its TYPE and its
three operands. */
+rtx rtx_build_vector_from_val (enum machine_mode, HOST_WIDE_INT);
+rtx gen_const_vector1 (enum machine_mode, int);
rtx
expand_vec_cond_expr (tree vec_cond_type, tree op0, tree op1, tree op2,
@@ -6572,16 +6574,39 @@ expand_vec_cond_expr (tree vec_cond_type
if (icode == CODE_FOR_nothing)
return 0;
- comparison = vector_compare_rtx (op0, unsignedp, icode);
rtx_op1 = expand_normal (op1);
rtx_op2 = expand_normal (op2);
+
+ if (COMPARISON_CLASS_P (op0))
+ {
+ comparison = vector_compare_rtx (op0, unsignedp, icode);
+ create_output_operand (&ops[0], target, mode);
+ create_input_operand (&ops[1], rtx_op1, mode);
+ create_input_operand (&ops[2], rtx_op2, mode);
+ create_fixed_operand (&ops[3], comparison);
+ create_fixed_operand (&ops[4], XEXP (comparison, 0));
+ create_fixed_operand (&ops[5], XEXP (comparison, 1));
+
+ }
+ else
+ {
+ enum rtx_code rcode;
+ rtx rtx_op0;
+ rtx vec;
+
+ rtx_op0 = expand_normal (op0);
+ rcode = get_rtx_code (EQ_EXPR, unsignedp);
+ comparison = gen_rtx_EQ (mode, NULL_RTX, NULL_RTX);
+ vec = rtx_build_vector_from_val (mode, -1);
+
+ create_output_operand (&ops[0], target, mode);
+ create_input_operand (&ops[1], rtx_op1, mode);
+ create_input_operand (&ops[2], rtx_op2, mode);
+ create_input_operand (&ops[3], comparison, mode);
+ create_input_operand (&ops[4], rtx_op0, mode);
+ create_input_operand (&ops[5], vec, mode);
+ }
- create_output_operand (&ops[0], target, mode);
- create_input_operand (&ops[1], rtx_op1, mode);
- create_input_operand (&ops[2], rtx_op2, mode);
- create_fixed_operand (&ops[3], comparison);
- create_fixed_operand (&ops[4], XEXP (comparison, 0));
- create_fixed_operand (&ops[5], XEXP (comparison, 1));
expand_insn (icode, 6, ops);
return ops[0].value;
}
Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c (revision 177665)
+++ gcc/config/i386/i386.c (working copy)
@@ -25,6 +25,7 @@ along with GCC; see the file COPYING3.
#include "tm.h"
#include "rtl.h"
#include "tree.h"
+#include "tree-flow.h"
#include "tm_p.h"
#include "regs.h"
#include "hard-reg-set.h"
@@ -18402,6 +18403,23 @@ ix86_expand_sse_fp_minmax (rtx dest, enu
return true;
}
+/* Returns a vector of mode MODE where all the elements are ARG. */
+rtx
+rtx_build_vector_from_val (enum machine_mode mode, HOST_WIDE_INT arg)
+{
+ rtvec v;
+ int units, i;
+ enum machine_mode inner;
+
+ units = GET_MODE_NUNITS (mode);
+ inner = GET_MODE_INNER (mode);
+ v = rtvec_alloc (units);
+ for (i = 0; i < units; ++i)
+ RTVEC_ELT (v, i) = gen_rtx_CONST_INT (inner, arg);
+
+ return gen_rtx_raw_CONST_VECTOR (mode, v);
+}
+
/* Expand an sse vector comparison. Return the register with the result. */
static rtx
@@ -18411,18 +18429,28 @@ ix86_expand_sse_cmp (rtx dest, enum rtx_
enum machine_mode mode = GET_MODE (dest);
rtx x;
- cmp_op0 = force_reg (mode, cmp_op0);
- if (!nonimmediate_operand (cmp_op1, mode))
- cmp_op1 = force_reg (mode, cmp_op1);
+ /* Avoid useless comparison. */
+ if (code == EQ
+ && rtx_equal_p (cmp_op1, rtx_build_vector_from_val (mode, -1)))
+ {
+ cmp_op0 = force_reg (mode, cmp_op0);
+ x = cmp_op0;
+ }
+ else
+ {
+ cmp_op0 = force_reg (mode, cmp_op0);
+ if (!nonimmediate_operand (cmp_op1, mode))
+ cmp_op1 = force_reg (mode, cmp_op1);
+
+ x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
+ }
if (optimize
|| reg_overlap_mentioned_p (dest, op_true)
|| reg_overlap_mentioned_p (dest, op_false))
dest = gen_reg_rtx (mode);
- x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
emit_insn (gen_rtx_SET (VOIDmode, dest, x));
-
return dest;
}
@@ -18434,8 +18462,14 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp
{
enum machine_mode mode = GET_MODE (dest);
rtx t2, t3, x;
-
- if (op_false == CONST0_RTX (mode))
+ rtx mask_true;
+
+ if (rtx_equal_p (op_true, rtx_build_vector_from_val (mode, -1))
+ && rtx_equal_p (op_false, CONST0_RTX (mode)))
+ {
+ emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
+ }
+ else if (op_false == CONST0_RTX (mode))
{
op_true = force_reg (mode, op_true);
x = gen_rtx_AND (mode, cmp, op_true);
@@ -18569,7 +18603,9 @@ ix86_expand_int_vcond (rtx operands[])
enum rtx_code code = GET_CODE (operands[3]);
bool negate = false;
rtx x, cop0, cop1;
+ rtx comp;
+ comp = operands[3];
cop0 = operands[4];
cop1 = operands[5];
@@ -18681,8 +18717,18 @@ ix86_expand_int_vcond (rtx operands[])
}
}
- x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
- operands[1+negate], operands[2-negate]);
+ if (GET_CODE (comp) == EQ && XEXP (comp, 0) == NULL_RTX
+ && XEXP (comp, 1) == NULL_RTX)
+ {
+ rtx vec = rtx_build_vector_from_val (mode, -1);
+ x = ix86_expand_sse_cmp (operands[0], code, cop0, vec,
+ operands[1+negate], operands[2-negate]);
+ }
+ else
+ {
+ x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
+ operands[1+negate], operands[2-negate]);
+ }
ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
operands[2-negate]);