Hi! While the https://gcc.gnu.org/ml/gcc-patches/2016-01/msg01087.html added *andndi3_doubleword, I don't understand how it can actually work. The problem is that this pattern is for combine, and needs combining of DImode NOT setter with DImode AND user. But there is no DImode pattern for one_cmpldi2, so it is lowered early into two one_cmplsi2 patterns and I can't see how combine would be able to figure stuff out from that.
This patch: 1) adds one_cmpldi2 pattern for stv purposes (which splits into two one_cmplsi2 after reload) 2) teaches the 32-bit stv pass to handle NOT (as xor all-ones) 3) renames the old *andndi3_doubleword to *andndi3_doubleword_bmi, as it is for -mbmi only, and adds another *andndi3_doubleword pattern that is meant to live just from combine till the stv pass, or worse case till following split1 pass when it is split back into not followed by and; this change makes it possible to use pandn in stv pass, even without -mbmi Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2016-12-02 Jakub Jelinek <ja...@redhat.com> PR target/70322 * config/i386/i386.c (dimode_scalar_to_vector_candidate_p): Handle NOT. (dimode_scalar_chain::compute_convert_gain): Likewise. (dimode_scalar_chain::convert_insn): Likewise. * config/i386/i386.md (*andndi3_doubleword): New define_insn_and_split. Rename the old define_insn_and_split to... (*andndi3_doubleword_bmi): ... this. (one_cmpl<mode>2): Use SWIM1248x iterator instead of SWIM. (*one_cmpldi2_doubleword): New define_insn_and_split. * gcc.target/i386/pr70322-1.c: New test. * gcc.target/i386/pr70322-2.c: New test. * gcc.target/i386/pr70322-3.c: New test. --- gcc/config/i386/i386.c.jj 2016-12-02 11:17:40.702995111 +0100 +++ gcc/config/i386/i386.c 2016-12-02 12:01:31.656469089 +0100 @@ -2826,6 +2826,9 @@ dimode_scalar_to_vector_candidate_p (rtx return false; break; + case NOT: + break; + case REG: return true; @@ -2848,7 +2851,8 @@ dimode_scalar_to_vector_candidate_p (rtx if ((GET_MODE (XEXP (src, 0)) != DImode && !CONST_INT_P (XEXP (src, 0))) - || (GET_MODE (XEXP (src, 1)) != DImode + || (GET_CODE (src) != NOT + && GET_MODE (XEXP (src, 1)) != DImode && !CONST_INT_P (XEXP (src, 1)))) return false; @@ -3415,6 +3419,8 @@ dimode_scalar_chain::compute_convert_gai if (CONST_INT_P (XEXP (src, 1))) gain -= vector_const_cost (XEXP (src, 1)); } + else if (GET_CODE (src) == NOT) + gain += ix86_cost->add - COSTS_N_INSNS (1); else if (GET_CODE (src) == COMPARE) { /* Assume comparison cost is the same. */ @@ -3770,6 +3776,14 @@ dimode_scalar_chain::convert_insn (rtx_i PUT_MODE (src, V2DImode); break; + case NOT: + src = XEXP (src, 0); + convert_op (&src, insn); + subreg = gen_reg_rtx (V2DImode); + emit_insn_before (gen_move_insn (subreg, CONSTM1_RTX (V2DImode)), insn); + src = gen_rtx_XOR (V2DImode, src, subreg); + break; + case MEM: if (!REG_P (dst)) convert_op (&src, insn); --- gcc/config/i386/i386.md.jj 2016-12-01 23:24:51.663157486 +0100 +++ gcc/config/i386/i386.md 2016-12-02 12:50:27.616829191 +0100 @@ -8534,7 +8534,7 @@ (define_split operands[2] = gen_lowpart (QImode, operands[2]); }) -(define_insn_and_split "*andndi3_doubleword" +(define_insn_and_split "*andndi3_doubleword_bmi" [(set (match_operand:DI 0 "register_operand" "=r") (and:DI (not:DI (match_operand:DI 1 "register_operand" "r")) @@ -8551,6 +8551,27 @@ (define_insn_and_split "*andndi3_doublew (clobber (reg:CC FLAGS_REG))])] "split_double_mode (DImode, &operands[0], 3, &operands[0], &operands[3]);") +;; This insn is there only so that the stv pass can use pandn even when +;; BMI is not available. It should be matched during combine and split +;; again in split1, unless the stv pass converts it. +(define_insn_and_split "*andndi3_doubleword" + [(set (match_operand:DI 0 "register_operand" "=r") + (and:DI + (not:DI (match_operand:DI 1 "register_operand" "r")) + (match_operand:DI 2 "nonimmediate_operand" "rm"))) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_BMI && !TARGET_64BIT && TARGET_STV && TARGET_SSE2 + && can_create_pseudo_p ()" + "#" + "&& 1" + [(set (match_dup 3) + (not:DI (match_dup 1))) + (parallel [(set (match_dup 0) + (and:DI (match_dup 3) + (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] + "operands[3] = gen_reg_rtx (DImode);") + (define_insn "*andn<mode>_1" [(set (match_operand:SWI48 0 "register_operand" "=r,r") (and:SWI48 @@ -9313,8 +9334,8 @@ (define_split ;; One complement instructions (define_expand "one_cmpl<mode>2" - [(set (match_operand:SWIM 0 "nonimmediate_operand") - (not:SWIM (match_operand:SWIM 1 "nonimmediate_operand")))] + [(set (match_operand:SWIM1248x 0 "nonimmediate_operand") + (not:SWIM1248x (match_operand:SWIM1248x 1 "nonimmediate_operand")))] "" "ix86_expand_unary_operator (NOT, <MODE>mode, operands); DONE;") @@ -9403,6 +9424,19 @@ (define_split (const_int 0)])) (set (match_dup 1) (zero_extend:DI (xor:SI (match_dup 3) (const_int -1))))])]) + +(define_insn_and_split "*one_cmpldi2_doubleword" + [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") + (not:DI (match_operand:DI 1 "nonimmediate_operand" "0")))] + "!TARGET_64BIT && TARGET_STV && TARGET_SSE2 + && ix86_unary_operator_ok (NOT, DImode, operands)" + "#" + "&& reload_completed" + [(set (match_dup 0) + (not:SI (match_dup 1))) + (set (match_dup 2) + (not:SI (match_dup 3)))] + "split_double_mode (DImode, &operands[0], 2, &operands[0], &operands[2]);") ;; Shift instructions --- gcc/testsuite/gcc.target/i386/pr70322-1.c.jj 2016-12-02 12:52:47.193051745 +0100 +++ gcc/testsuite/gcc.target/i386/pr70322-1.c 2016-12-02 12:52:24.708338078 +0100 @@ -0,0 +1,12 @@ +/* PR target/70322 */ +/* { dg-do compile { target ia32 } } */ +/* { dg-options "-O2 -msse2 -mstv -mbmi" } */ +/* { dg-final { scan-assembler "pandn" } } */ + +extern long long z; + +void +foo (long long x, long long y) +{ + z = ~x & y; +} --- gcc/testsuite/gcc.target/i386/pr70322-2.c.jj 2016-12-02 12:52:50.165013898 +0100 +++ gcc/testsuite/gcc.target/i386/pr70322-2.c 2016-12-02 12:52:39.302152232 +0100 @@ -0,0 +1,12 @@ +/* PR target/70322 */ +/* { dg-do compile { target ia32 } } */ +/* { dg-options "-O2 -msse2 -mstv -mno-bmi" } */ +/* { dg-final { scan-assembler "pandn" } } */ + +extern long long z; + +void +foo (long long x, long long y) +{ + z = ~x & y; +} --- gcc/testsuite/gcc.target/i386/pr70322-3.c.jj 2016-12-02 13:07:27.658796578 +0100 +++ gcc/testsuite/gcc.target/i386/pr70322-3.c 2016-12-02 13:08:11.899229225 +0100 @@ -0,0 +1,13 @@ +/* PR target/70322 */ +/* { dg-do compile { target ia32 } } */ +/* { dg-options "-O2 -msse2 -mstv" } */ +/* { dg-final { scan-assembler "pxor" } } */ +/* { dg-final { scan-assembler "por" } } */ + +extern long long z; + +void +foo (long long x, long long y) +{ + z = ~x | y; +} Jakub