On Thu, Oct 12, 2017 at 9:11 PM, Jakub Jelinek <ja...@redhat.com> wrote:
> On Thu, Oct 12, 2017 at 10:40:22AM +0200, Uros Bizjak wrote:
>> > So, if you aren't against it, I can extend the patch to handle the 4
>> > other mask patterns; as for other modes, SImode is what is being handled
>> > already, DImode is not a problem, because the FEs truncate the shift counts
>> > to integer_type_node already, and for HImode I haven't seen problem
>> > probably because most tunings avoid HImode math and so it isn't worth
>> > optimizing.
>>
>> OK, I think that we can live wtih 4 new patterns. Since these are all
>> written in the same way (as in the patch you posted), the ammended
>> patch is pre-approved for mainline.
>
> Thanks, here is what I've committed to trunk after another bootstrap/regtest
> on x86_64-linux and i686-linux:
>
> 2017-10-12  Jakub Jelinek  <ja...@redhat.com>
>
>         PR target/82498
>         * config/i386/i386.md (*ashl<mode>3_mask_1,
>         *<shift_insn><mode>3_mask_1, *<rotate_insn><mode>3_mask_1,
>         *<btsc><mode>_mask_1, *btr<mode>_mask_1): New define_insn_and_split
>         patterns.
>
>         * gcc.target/i386/pr82498-1.c: New test.
>         * gcc.target/i386/pr82498-2.c: New test.

OK for mainline.

Thanks,
Uros.

> --- gcc/config/i386/i386.md.jj  2017-10-11 22:37:55.933863355 +0200
> +++ gcc/config/i386/i386.md     2017-10-12 11:30:38.191535974 +0200
> @@ -10228,6 +10228,26 @@ (define_insn_and_split "*ashl<mode>3_mas
>        (clobber (reg:CC FLAGS_REG))])]
>    "operands[2] = gen_lowpart (QImode, operands[2]);")
>
> +(define_insn_and_split "*ashl<mode>3_mask_1"
> +  [(set (match_operand:SWI48 0 "nonimmediate_operand")
> +       (ashift:SWI48
> +         (match_operand:SWI48 1 "nonimmediate_operand")
> +         (and:QI
> +           (match_operand:QI 2 "register_operand")
> +           (match_operand:QI 3 "const_int_operand"))))
> +   (clobber (reg:CC FLAGS_REG))]
> +  "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)
> +   && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
> +      == GET_MODE_BITSIZE (<MODE>mode)-1
> +   && can_create_pseudo_p ()"
> +  "#"
> +  "&& 1"
> +  [(parallel
> +     [(set (match_dup 0)
> +          (ashift:SWI48 (match_dup 1)
> +                        (match_dup 2)))
> +      (clobber (reg:CC FLAGS_REG))])])
> +
>  (define_insn "*bmi2_ashl<mode>3_1"
>    [(set (match_operand:SWI48 0 "register_operand" "=r")
>         (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
> @@ -10728,6 +10748,26 @@ (define_insn_and_split "*<shift_insn><mo
>        (clobber (reg:CC FLAGS_REG))])]
>    "operands[2] = gen_lowpart (QImode, operands[2]);")
>
> +(define_insn_and_split "*<shift_insn><mode>3_mask_1"
> +  [(set (match_operand:SWI48 0 "nonimmediate_operand")
> +       (any_shiftrt:SWI48
> +         (match_operand:SWI48 1 "nonimmediate_operand")
> +         (and:QI
> +           (match_operand:QI 2 "register_operand")
> +           (match_operand:QI 3 "const_int_operand"))))
> +   (clobber (reg:CC FLAGS_REG))]
> +  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
> +   && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
> +      == GET_MODE_BITSIZE (<MODE>mode)-1
> +   && can_create_pseudo_p ()"
> +  "#"
> +  "&& 1"
> +  [(parallel
> +     [(set (match_dup 0)
> +          (any_shiftrt:SWI48 (match_dup 1)
> +                             (match_dup 2)))
> +      (clobber (reg:CC FLAGS_REG))])])
> +
>  (define_insn_and_split "*<shift_insn><mode>3_doubleword"
>    [(set (match_operand:DWI 0 "register_operand" "=&r")
>         (any_shiftrt:DWI (match_operand:DWI 1 "register_operand" "0")
> @@ -11187,6 +11227,26 @@ (define_insn_and_split "*<rotate_insn><m
>        (clobber (reg:CC FLAGS_REG))])]
>    "operands[2] = gen_lowpart (QImode, operands[2]);")
>
> +(define_insn_and_split "*<rotate_insn><mode>3_mask_1"
> +  [(set (match_operand:SWI48 0 "nonimmediate_operand")
> +       (any_rotate:SWI48
> +         (match_operand:SWI48 1 "nonimmediate_operand")
> +         (and:QI
> +           (match_operand:QI 2 "register_operand")
> +           (match_operand:QI 3 "const_int_operand"))))
> +   (clobber (reg:CC FLAGS_REG))]
> +  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
> +   && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
> +      == GET_MODE_BITSIZE (<MODE>mode)-1
> +   && can_create_pseudo_p ()"
> +  "#"
> +  "&& 1"
> +  [(parallel
> +     [(set (match_dup 0)
> +          (any_rotate:SWI48 (match_dup 1)
> +                            (match_dup 2)))
> +      (clobber (reg:CC FLAGS_REG))])])
> +
>  ;; Implement rotation using two double-precision
>  ;; shift instructions and a scratch register.
>
> @@ -11494,6 +11554,30 @@ (define_insn_and_split "*<btsc><mode>_ma
>        (clobber (reg:CC FLAGS_REG))])]
>    "operands[1] = gen_lowpart (QImode, operands[1]);")
>
> +(define_insn_and_split "*<btsc><mode>_mask_1"
> +  [(set (match_operand:SWI48 0 "register_operand")
> +       (any_or:SWI48
> +         (ashift:SWI48
> +           (const_int 1)
> +           (and:QI
> +             (match_operand:QI 1 "register_operand")
> +             (match_operand:QI 2 "const_int_operand")))
> +         (match_operand:SWI48 3 "register_operand")))
> +   (clobber (reg:CC FLAGS_REG))]
> +  "TARGET_USE_BT
> +   && (INTVAL (operands[2]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
> +      == GET_MODE_BITSIZE (<MODE>mode)-1
> +   && can_create_pseudo_p ()"
> +  "#"
> +  "&& 1"
> +  [(parallel
> +     [(set (match_dup 0)
> +          (any_or:SWI48
> +            (ashift:SWI48 (const_int 1)
> +                          (match_dup 1))
> +            (match_dup 3)))
> +      (clobber (reg:CC FLAGS_REG))])])
> +
>  (define_insn "*btr<mode>"
>    [(set (match_operand:SWI48 0 "register_operand" "=r")
>         (and:SWI48
> @@ -11535,6 +11619,30 @@ (define_insn_and_split "*btr<mode>_mask"
>        (clobber (reg:CC FLAGS_REG))])]
>    "operands[1] = gen_lowpart (QImode, operands[1]);")
>
> +(define_insn_and_split "*btr<mode>_mask_1"
> +  [(set (match_operand:SWI48 0 "register_operand")
> +       (and:SWI48
> +         (rotate:SWI48
> +           (const_int -2)
> +           (and:QI
> +             (match_operand:QI 1 "register_operand")
> +             (match_operand:QI 2 "const_int_operand")))
> +         (match_operand:SWI48 3 "register_operand")))
> +   (clobber (reg:CC FLAGS_REG))]
> +  "TARGET_USE_BT
> +   && (INTVAL (operands[2]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
> +      == GET_MODE_BITSIZE (<MODE>mode)-1
> +   && can_create_pseudo_p ()"
> +  "#"
> +  "&& 1"
> +  [(parallel
> +     [(set (match_dup 0)
> +          (and:SWI48
> +            (rotate:SWI48 (const_int -2)
> +                          (match_dup 1))
> +            (match_dup 3)))
> +      (clobber (reg:CC FLAGS_REG))])])
> +
>  ;; These instructions are never faster than the corresponding
>  ;; and/ior/xor operations when using immediate operand, so with
>  ;; 32-bit there's no point.  But in 64-bit, we can't hold the
> --- gcc/testsuite/gcc.target/i386/pr82498-1.c.jj        2017-10-12 
> 11:18:48.905128703 +0200
> +++ gcc/testsuite/gcc.target/i386/pr82498-1.c   2017-10-12 11:18:48.905128703 
> +0200
> @@ -0,0 +1,52 @@
> +/* PR target/82498 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mtune=generic -masm=att" } */
> +/* { dg-final { scan-assembler-not {\mand[bwlq]\M} } } */
> +
> +unsigned
> +f1 (unsigned x, unsigned char y)
> +{
> +  if (y == 0)
> +    return x;
> +  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
> +  return (x << y) | (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - y));
> +}
> +
> +unsigned
> +f2 (unsigned x, unsigned y)
> +{
> +  if (y == 0)
> +    return x;
> +  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
> +  return (x << y) | (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - y));
> +}
> +
> +unsigned
> +f3 (unsigned x, unsigned short y)
> +{
> +  if (y == 0)
> +    return x;
> +  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
> +  return (x << y) | (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - y));
> +}
> +
> +unsigned
> +f4 (unsigned x, unsigned char y)
> +{
> +  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
> +  return (x << y) | (x >> (-y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1)));
> +}
> +
> +unsigned
> +f5 (unsigned x, unsigned int y)
> +{
> +  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
> +  return (x << y) | (x >> (-y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1)));
> +}
> +
> +unsigned
> +f6 (unsigned x, unsigned short y)
> +{
> +  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
> +  return (x << y) | (x >> (-y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1)));
> +}
> --- gcc/testsuite/gcc.target/i386/pr82498-2.c.jj        2017-10-12 
> 12:14:53.452321121 +0200
> +++ gcc/testsuite/gcc.target/i386/pr82498-2.c   2017-10-12 12:08:53.000000000 
> +0200
> @@ -0,0 +1,46 @@
> +/* PR target/82498 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mtune=generic -masm=att" } */
> +/* { dg-final { scan-assembler-not {\mand[bwlq]\M} } } */
> +
> +int
> +f1 (int x, unsigned char y)
> +{
> +  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
> +  return x >> y;
> +}
> +
> +unsigned
> +f2 (unsigned x, unsigned char y)
> +{
> +  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
> +  return x >> y;
> +}
> +
> +unsigned
> +f3 (unsigned x, unsigned char y)
> +{
> +  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
> +  return x << y;
> +}
> +
> +unsigned
> +f4 (unsigned x, unsigned char y)
> +{
> +  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
> +  return x | (1U << y);
> +}
> +
> +unsigned
> +f5 (unsigned x, unsigned char y)
> +{
> +  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
> +  return x ^ (1U << y);
> +}
> +
> +unsigned
> +f6 (unsigned x, unsigned char y)
> +{
> +  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
> +  return (x + 2) & ~(1U << y);
> +}
>
>
>         Jakub

Reply via email to