On Sat, Oct 6, 2018 at 12:11 AM Jason Ekstrand <ja...@jlekstrand.net> wrote:
>
> From: Marek Olšák <marek.ol...@amd.com>
>
> ---
>  src/util/fast_idiv_by_const.c | 21 +++++++++++++++++++++
>  src/util/fast_idiv_by_const.h |  5 +++--
>  2 files changed, 24 insertions(+), 2 deletions(-)
>
> diff --git a/src/util/fast_idiv_by_const.c b/src/util/fast_idiv_by_const.c
> index 65a9e640789..7b93316268c 100644
> --- a/src/util/fast_idiv_by_const.c
> +++ b/src/util/fast_idiv_by_const.c
> @@ -52,6 +52,27 @@ util_compute_fast_udiv_info(uint64_t D, unsigned num_bits, 
> unsigned UINT_BITS)
>     /* The eventual result */
>     struct util_fast_udiv_info result;
>
> +   if (util_is_power_of_two_or_zero64(D)) {
> +      unsigned div_shift = util_logbase2_64(D);
> +
> +      if (div_shift) {
> +         /* Dividing by a power of two. */
> +         result.multiplier = 1ull << (UINT_BITS - div_shift);
> +         result.pre_shift = 0;
> +         result.post_shift = 0;
> +         result.increment = 0;
> +         return result;
> +      } else {
> +         /* Dividing by 1. */
> +         /* Assuming: floor((num + 1) * (2^32 - 1) / 2^32) = num */
> +         result.multiplier = UINT_BITS == 64 ? UINT64_MAX :
> +                                               (1ull << UINT_BITS) - 1;
> +         result.pre_shift = 0;
> +         result.post_shift = 0;
> +         result.increment = 1;
> +         return result;
> +      }
> +   }
>
>     /* The extra shift implicit in the difference between UINT_BITS and 
> num_bits
>      */
> diff --git a/src/util/fast_idiv_by_const.h b/src/util/fast_idiv_by_const.h
> index 231311f84be..3363fb9ee71 100644
> --- a/src/util/fast_idiv_by_const.h
> +++ b/src/util/fast_idiv_by_const.h
> @@ -98,8 +98,9 @@ util_compute_fast_sdiv_info(int64_t D, unsigned SINT_BITS);
>   *   emit("result >>>= UINT_BITS")
>   *   if m.post_shift > 0: emit("result >>>= m.post_shift")
>   *
> - * The shifts by UINT_BITS may be "free" if the high half of the full 
> multiply
> - * is put in a separate register.
> + * This second version works even if D is a power of two.  The shifts by

I think you meant to say that the second version works even if D is 1.

Marek
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to