Re: [PATCH] teach emit_store_flag to use clz/ctz

Richard Guenther Fri, 27 Apr 2012 04:17:09 -0700

On Fri, 27 Apr 2012, Paolo Bonzini wrote:

> If the value at zero is outside the range [0, GET_MODE_BITSIZE (mode)),
> "A != 0" and "A == 0" can be compiled to clz/ctz followed by a subtraction
> or one's complement (only for A != 0) and a shift.  This trick can be
> used effectively on PPC (though there are other problems in the machine
> description to fix first).
> 
> Bootstrapped/regtested x86_64-pc-linux-gnu, and tested manually on PPC
> with some changes to the machine description.  Ok for mainline?


See below.

> Paolo
> 
> 2012-04-27  Paolo Bonzini  <bonz...@gnu.org>
> 
>       PR 53087
>       * expmed.c (emit_store_flag): Generalize to support cases
>       where the result is not in the sign bit.  Add a trick
>       involving the value at zero of clz or ctz.
> 
> Index: expmed.c
> ===================================================================
> --- expmed.c  (revisione 186859)
> +++ expmed.c  (copia locale)
> @@ -5419,6 +5419,7 @@
>    enum rtx_code rcode;
>    rtx subtarget;
>    rtx tem, last, trueval;
> +  int shift;
>  
>    tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, 
> normalizep,
>                          target_mode);
> @@ -5612,10 +5613,11 @@
>                          false) <= 1 || (code != LE && code != GT))))
>      return 0;
>  
> -  /* Try to put the result of the comparison in the sign bit.  Assume we 
> can't
> -     do the necessary operation below.  */
> +  /* Assume we can't do the necessary operation below, so try to put the
> +     result of the comparison in a known bit, given by SHIFT.  */
>  
>    tem = 0;
> +  shift = GET_MODE_BITSIZE (mode) - 1;
>  
>    /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
>       the sign bit set.  */
> @@ -5650,25 +5652,22 @@
>  
>    if (code == EQ || code == NE)
>      {
> -      /* For EQ or NE, one way to do the comparison is to apply an operation
> -      that converts the operand into a positive number if it is nonzero
> -      or zero if it was originally zero.  Then, for EQ, we subtract 1 and
> -      for NE we negate.  This puts the result in the sign bit.  Then we
> -      normalize with a shift, if needed.
> +      /* Look for an operation that converts the operand into a positive
> +      number if it is nonzero or zero if it was originally zero.  Then,
> +      for EQ, we subtract 1 and for NE we negate.  This puts the result
> +      in the sign bit.  Then we normalize with a shift, if needed.
>  
> -      Two operations that can do the above actions are ABS and FFS, so try
> -      them.  If that doesn't work, and MODE is smaller than a full word,
> -      we can use zero-extension to the wider mode (an unsigned conversion)
> -      as the operation.  */
> -
> -      /* Note that ABS doesn't yield a positive number for INT_MIN, but
> +      ABS can do this; it doesn't yield a positive number for INT_MIN, but
>        that is compensated by the subsequent overflow when subtracting
> -      one / negating.  */
> +      one / negating.  If that doesn't work, and MODE is smaller than
> +      a full word, we can use zero-extension to the wider mode (an
> +      unsigned conversion) as the operation.
>  
> +      FFS could also do this, but we use a more versatile trick with
> +      CLZ/CTZ below.  */
> +
>        if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
>       tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
> -      else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
> -     tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
>        else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
>       {
>         tem = convert_modes (word_mode, mode, op0, 1);
> @@ -5684,6 +5683,69 @@
>           tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
>       }
>  
> +      if (tem == 0)
> +        {
> +          int clz_zero, ctz_zero, if_zero;
> +
> +          /* For EQ or NE, clz and ctz may give a unique value for zero that 
> is
> +             not in the [0, GET_MODE_BITSIZE (mode)) range.  The result can 
> be
> +             massaged to put the result in a known bit.  */
> +
> +          if (!CLZ_DEFINED_VALUE_AT_ZERO (mode, clz_zero))
> +            clz_zero = 0;
> +          if (!CTZ_DEFINED_VALUE_AT_ZERO (mode, ctz_zero))
> +            ctz_zero = 0;

In optabs.c we compare the CTZ_DEFINED_VALUE_AT_ZERO against two,
is != 0 really what you want here?  The docs suggest to me
that as you are using the optab below you should compare against two, too.

What about cost considerations?  We only seem to have the general
"branches are expensive" metric - but ctz/clz may be prohibitely expensive
themselves, no?

Richard.

> +          /* Prefer a negative result if normalize == -1, since that does not
> +             require an additional left shift.  */
> +
> +          if (normalizep == -1)
> +            {
> +              if (ctz_zero < 0 && clz_zero >= GET_MODE_BITSIZE (mode))
> +                clz_zero = 0;
> +              if (clz_zero < 0 && ctz_zero >= GET_MODE_BITSIZE (mode))
> +                ctz_zero = 0;
> +            }
> +
> +          if ((clz_zero < 0 || clz_zero >= GET_MODE_BITSIZE (mode))
> +              && optab_handler (clz_optab, mode) != CODE_FOR_nothing)
> +            {
> +              tem = expand_unop (mode, clz_optab, op0, subtarget, 1);
> +              if_zero = clz_zero;
> +            }
> +          if (tem == 0
> +              && (ctz_zero < 0 || ctz_zero >= GET_MODE_BITSIZE (mode))
> +              && optab_handler (ctz_optab, mode) != CODE_FOR_nothing)
> +            {
> +              tem = expand_unop (mode, ctz_optab, op0, subtarget, 1);
> +              if_zero = ctz_zero;
> +            }
> +
> +          if (tem != 0)
> +            {
> +              if (code == NE)
> +                {
> +                  /* Place the result in the sign bit.  For negative value at
> +                     zero, compute one's complement; for positive value at
> +                     zero, subtract the mode size.  */
> +                  if (if_zero > 0)
> +                    tem = expand_binop (mode, sub_optab, tem,
> +                                        GEN_INT (GET_MODE_BITSIZE (mode)),
> +                                        subtarget, 0, OPTAB_WIDEN);
> +                  else
> +                    tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 
> 0);
> +                }
> +              else
> +                {
> +                  /* The result is already in some bit, but it may not be the
> +                     sign bit.  */
> +                  if (if_zero > 0)
> +                    shift = floor_log2 (if_zero);
> +                }
> +              goto result_in_known_bit;
> +            }
> +        }
> +
>        /* If we couldn't do it that way, for NE we can "or" the two's 
> complement
>        of the value with itself.  For EQ, we take the one's complement of
>        that "or", which is an extra insn, so we only handle EQ if branches
> @@ -5706,10 +5768,17 @@
>       }
>      }
>  
> -  if (tem && normalizep)
> -    tem = expand_shift (RSHIFT_EXPR, mode, tem,
> -                     GET_MODE_BITSIZE (mode) - 1,
> -                     subtarget, normalizep == 1);
> +result_in_known_bit:
> +  if (tem && normalizep == -1 && shift < GET_MODE_BITSIZE (mode) - 1)
> +    {
> +      tem = expand_shift (LSHIFT_EXPR, mode, tem,
> +                          GET_MODE_BITSIZE (mode) - shift - 1,
> +                          subtarget, normalizep == 1);
> +      shift = GET_MODE_BITSIZE (mode) - 1;
> +    }
> +  if (tem)
> +    tem = expand_shift (RSHIFT_EXPR, mode, tem, shift,
> +                        subtarget, normalizep == 1);
>  
>    if (tem)
>      {
> Index: config/rs6000/rs6000.md
> ===================================================================
> --- config/rs6000/rs6000.md   (revisione 186859)
> +++ config/rs6000/rs6000.md   (copia locale)
> @@ -2129,7 +2129,7 @@
>  (define_expand "abssi2"
>    [(set (match_operand:SI 0 "gpc_reg_operand" "")
>       (abs:SI (match_operand:SI 1 "gpc_reg_operand" "")))]
> -  ""
> +  "TARGET_ISEL || TARGET_POWER"
>    "
>  {
>    if (TARGET_ISEL)
> @@ -2137,11 +2137,6 @@
>        emit_insn (gen_abssi2_isel (operands[0], operands[1]));
>        DONE;
>      }
> -  else if (! TARGET_POWER)
> -    {
> -      emit_insn (gen_abssi2_nopower (operands[0], operands[1]));
> -      DONE;
> -    }
>  }")
>  
>  (define_insn "*abssi2_power"
> @@ -2188,36 +2183,12 @@
>                         (match_dup 2)))]
>    "")
>  
> -(define_insn_and_split "abssi2_nopower"
> -  [(set (match_operand:SI 0 "gpc_reg_operand" "=&r,r")
> -        (abs:SI (match_operand:SI 1 "gpc_reg_operand" "r,0")))
> -   (clobber (match_scratch:SI 2 "=&r,&r"))]
> -  "! TARGET_POWER && ! TARGET_ISEL"
> -  "#"
> -  "&& reload_completed"
> -  [(set (match_dup 2) (ashiftrt:SI (match_dup 1) (const_int 31)))
> -   (set (match_dup 0) (xor:SI (match_dup 2) (match_dup 1)))
> -   (set (match_dup 0) (minus:SI (match_dup 0) (match_dup 2)))]
> -  "")
> -
>  (define_insn "*nabs_power"
>    [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
>       (neg:SI (abs:SI (match_operand:SI 1 "gpc_reg_operand" "r"))))]
>    "TARGET_POWER"
>    "nabs %0,%1")
>  
> -(define_insn_and_split "*nabs_nopower"
> -  [(set (match_operand:SI 0 "gpc_reg_operand" "=&r,r")
> -        (neg:SI (abs:SI (match_operand:SI 1 "gpc_reg_operand" "r,0"))))
> -   (clobber (match_scratch:SI 2 "=&r,&r"))]
> -  "! TARGET_POWER"
> -  "#"
> -  "&& reload_completed"
> -  [(set (match_dup 2) (ashiftrt:SI (match_dup 1) (const_int 31)))
> -   (set (match_dup 0) (xor:SI (match_dup 2) (match_dup 1)))
> -   (set (match_dup 0) (minus:SI (match_dup 2) (match_dup 0)))]
> -  "")
> -
>  (define_expand "neg<mode>2"
>    [(set (match_operand:SDI 0 "gpc_reg_operand" "")
>       (neg:SDI (match_operand:SDI 1 "gpc_reg_operand" "")))]
> @@ -7710,40 +7681,13 @@
>  (define_expand "absdi2"
>    [(set (match_operand:DI 0 "gpc_reg_operand" "")
>       (abs:DI (match_operand:DI 1 "gpc_reg_operand" "")))]
> -  "TARGET_POWERPC64"
> +  "TARGET_POWERPC64 && TARGET_ISEL"
>    "
>  {
> -  if (TARGET_ISEL)
> -    emit_insn (gen_absdi2_isel (operands[0], operands[1]));
> -  else
> -    emit_insn (gen_absdi2_internal (operands[0], operands[1]));
> +  emit_insn (gen_absdi2_isel (operands[0], operands[1]));
>    DONE;
>  }")
>  
> -(define_insn_and_split "absdi2_internal"
> -  [(set (match_operand:DI 0 "gpc_reg_operand" "=&r,r")
> -        (abs:DI (match_operand:DI 1 "gpc_reg_operand" "r,0")))
> -   (clobber (match_scratch:DI 2 "=&r,&r"))]
> -  "TARGET_POWERPC64 && !TARGET_ISEL"
> -  "#"
> -  "&& reload_completed"
> -  [(set (match_dup 2) (ashiftrt:DI (match_dup 1) (const_int 63)))
> -   (set (match_dup 0) (xor:DI (match_dup 2) (match_dup 1)))
> -   (set (match_dup 0) (minus:DI (match_dup 0) (match_dup 2)))]
> -  "")
> -
> -(define_insn_and_split "*nabsdi2"
> -  [(set (match_operand:DI 0 "gpc_reg_operand" "=&r,r")
> -        (neg:DI (abs:DI (match_operand:DI 1 "gpc_reg_operand" "r,0"))))
> -   (clobber (match_scratch:DI 2 "=&r,&r"))]
> -  "TARGET_POWERPC64 && !TARGET_ISEL"
> -  "#"
> -  "&& reload_completed"
> -  [(set (match_dup 2) (ashiftrt:DI (match_dup 1) (const_int 63)))
> -   (set (match_dup 0) (xor:DI (match_dup 2) (match_dup 1)))
> -   (set (match_dup 0) (minus:DI (match_dup 2) (match_dup 0)))]
> -  "")
> -
>  (define_insn "muldi3"
>    [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
>          (mult:DI (match_operand:DI 1 "gpc_reg_operand" "%r,r")
> @@ -13100,13 +13044,9 @@
>                                   operands[2], operands[3]);
>      }
>  
> -  /* For SNE, we would prefer that the xor/abs sequence be used for integers.
> -     For SEQ, likewise, except that comparisons with zero should be done
> -     with an scc insns.  However, due to the order that combine see the
> -     resulting insns, we must, in fact, allow SEQ for integers.  Fail in
> -     the cases we don't want to handle or are best handled by portable
> -     code.  */
> -  if (GET_CODE (operands[1]) == NE)
> +  /* Fail in the cases we don't want to handle or are best handled by
> +     portable code.  */
> +  if (GET_CODE (operands[1]) == NE || GET_CODE (operands[1]) == EQ)
>      FAIL;
>    if ((GET_CODE (operands[1]) == LT || GET_CODE (operands[1]) == LE
>         || GET_CODE (operands[1]) == GT || GET_CODE (operands[1]) == GE)
> 
> 

-- 
Richard Guenther <rguent...@suse.de>
SUSE / SUSE Labs
SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746
GF: Jeff Hawn, Jennifer Guild, Felix ImendÃ¶rffer

Re: [PATCH] teach emit_store_flag to use clz/ctz

Reply via email to