Am 25.04.2016 um 15:44 schrieb Jose Fonseca:
> Looks good AFAICT.  Can there be an impact in performance?
Yes, but I think it should be fairly minimal for the affected modes - at
least int compare / min instructions are cheap (too bad we only have
unsigned min with/max with sse41 but not compare).
mirror_repeat (using coord_mirror) actually might not be optimal -
probably could use unsigned min/max later there instead of signed ones
and skip the extra float min. But it's always a question what the cpu
actually supports - if there's only sse2 integer min/max have to be
emulated (and since there's no unsigned comparisons, emulation gets more
complex in this case), float min/max not.

> 
> Reviewed-by: Jose Fonseca <jfons...@vmware.com>
> 
> I also think we should have piglit testcases for these things.  Would it
> be easy to modify one of the existing textrwap to use Nans?  We need to
> use BGRA8 and something else (e.g., RGBA32F) to covert both AoS/SoA paths.
Sounds like a good idea (I suppose though it's really only interesting
for sw based drivers).

Roland


> 
> Jose
> 
> 
> 
> On 22/04/16 14:33, srol...@vmware.com wrote:
>> From: Roland Scheidegger <srol...@vmware.com>
>>
>> Some cases (especially these using fract for coord wrapping) did not
>> handle
>> NaNs (or Infs) correctly - the following code assumed the fract result
>> could not be outside [0,1], but if the input is a NaN (or +-Inf) the
>> fract
>> result was NaN - which then could produce out-of-bound offsets.
>>
>> (Note that the explicit NaN behavior changes for min/max on x86 sse don't
>> result in actual changes in the generated jit code, but may on other
>> architectures. Found by looking through all the wrap functions.)
>>
>> This fixes https://bugs.freedesktop.org/show_bug.cgi?id=94955
>>
>> Cc: "11.1 11.2" <mesa-sta...@lists.freedesktop.org>
>> ---
>>   src/gallium/auxiliary/gallivm/lp_bld_arit.c       |  9 ++++---
>>   src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c | 13 ++++++++-
>>   src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 33
>> +++++++++++++++++------
>>   3 files changed, 42 insertions(+), 13 deletions(-)
>>
>> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
>> b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
>> index beff414..17cf296 100644
>> --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
>> +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
>> @@ -2069,8 +2069,8 @@ lp_build_fract(struct lp_build_context *bld,
>>
>>
>>   /**
>> - * Prevent returning a fractional part of 1.0 for very small negative
>> values of
>> - * 'a' by clamping against 0.99999(9).
>> + * Prevent returning 1.0 for very small negative values of 'a' by
>> clamping
>> + * against 0.99999(9). (Will also return that value for NaNs.)
>>    */
>>   static inline LLVMValueRef
>>   clamp_fract(struct lp_build_context *bld, LLVMValueRef fract)
>> @@ -2080,13 +2080,14 @@ clamp_fract(struct lp_build_context *bld,
>> LLVMValueRef fract)
>>      /* this is the largest number smaller than 1.0 representable as
>> float */
>>      max = lp_build_const_vec(bld->gallivm, bld->type,
>>                               1.0 - 1.0/(1LL <<
>> (lp_mantissa(bld->type) + 1)));
>> -   return lp_build_min(bld, fract, max);
>> +   return lp_build_min_ext(bld, fract, max,
>> +                           GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
> 
>>   }
>>
>>
>>   /**
>>    * Same as lp_build_fract, but guarantees that the result is always
>> smaller
>> - * than one.
>> + * than one. Will also return the smaller-than-one value for infs, NaNs.
>>    */
>>   LLVMValueRef
>>   lp_build_fract_safe(struct lp_build_context *bld,
>> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
>> b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
>> index 729c5b8..6bf92c8 100644
>> --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
>> +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
>> @@ -246,6 +246,12 @@ lp_build_coord_repeat_npot_linear_int(struct
>> lp_build_sample_context *bld,
>>      mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
>>                              PIPE_FUNC_LESS, *coord0_i,
>> int_coord_bld->zero);
>>      *coord0_i = lp_build_select(int_coord_bld, mask,
>> length_minus_one, *coord0_i);
>> +   /*
>> +    * We should never get values too large - except if coord was nan
>> or inf,
>> +    * in which case things go terribly wrong...
>> +    * Alternatively, could use fract_safe above...
>> +    */
>> +   *coord0_i = lp_build_min(int_coord_bld, *coord0_i, length_minus_one);
>>   }
>>
>>
>> @@ -490,6 +496,10 @@ lp_build_sample_wrap_linear_float(struct
>> lp_build_sample_context *bld,
>>            *coord1 = lp_build_add(coord_bld, coord, half);
>>            coord = lp_build_sub(coord_bld, coord, half);
>>            *weight = lp_build_fract(coord_bld, coord);
>> +         /*
>> +          * It is important for this comparison to be unordered
>> +          * (or need fract_safe above).
>> +          */
>>            mask = lp_build_compare(coord_bld->gallivm, coord_bld->type,
>>                                    PIPE_FUNC_LESS, coord,
>> coord_bld->zero);
>>            *coord0 = lp_build_select(coord_bld, mask,
>> length_minus_one, coord);
>> @@ -514,7 +524,8 @@ lp_build_sample_wrap_linear_float(struct
>> lp_build_sample_context *bld,
>>            coord = lp_build_sub(coord_bld, coord, half);
>>         }
>>         /* clamp to [0, length - 1] */
>> -      coord = lp_build_min(coord_bld, coord, length_minus_one);
>> +      coord = lp_build_min_ext(coord_bld, coord, length_minus_one,
>> +                               GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
>>         coord = lp_build_max(coord_bld, coord, coord_bld->zero);
>>         *coord1 = lp_build_add(coord_bld, coord, coord_bld->one);
>>         /* convert to int, compute lerp weight */
>> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
>> b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
>> index 1727105..ace24fd 100644
>> --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
>> +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
>> @@ -228,11 +228,15 @@ lp_build_coord_mirror(struct
>> lp_build_sample_context *bld,
>>      LLVMValueRef fract, flr, isOdd;
>>
>>      lp_build_ifloor_fract(coord_bld, coord, &flr, &fract);
>> +   /* kill off NaNs */
>> +   fract = lp_build_min_ext(coord_bld, fract, coord_bld->zero,
>> +                            GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
>>
>>      /* isOdd = flr & 1 */
>>      isOdd = LLVMBuildAnd(bld->gallivm->builder, flr,
>> int_coord_bld->one, "");
>>
>>      /* make coord positive or negative depending on isOdd */
>> +   /* XXX slight overkill masking out sign bit is unnecessary */
>>      coord = lp_build_set_sign(coord_bld, fract, isOdd);
>>
>>      /* convert isOdd to float */
>> @@ -272,10 +276,15 @@ lp_build_coord_repeat_npot_linear(struct
>> lp_build_sample_context *bld,
>>       * we avoided the 0.5/length division before the repeat wrap,
>>       * now need to fix up edge cases with selects
>>       */
>> +   /*
>> +    * Note we do a float (unordered) compare so we can eliminate NaNs.
>> +    * (Otherwise would need fract_safe above).
>> +    */
>> +   mask = lp_build_compare(coord_bld->gallivm, coord_bld->type,
>> +                           PIPE_FUNC_LESS, coord_f, coord_bld->zero);
>> +
>>      /* convert to int, compute lerp weight */
>>      lp_build_ifloor_fract(coord_bld, coord_f, coord0_i, weight_f);
>> -   mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
>> -                           PIPE_FUNC_LESS, *coord0_i,
>> int_coord_bld->zero);
>>      *coord0_i = lp_build_select(int_coord_bld, mask,
>> length_minus_one, *coord0_i);
>>   }
>>
>> @@ -375,7 +384,8 @@ lp_build_sample_wrap_linear(struct
>> lp_build_sample_context *bld,
>>            }
>>
>>            /* clamp to length max */
>> -         coord = lp_build_min(coord_bld, coord, length_f);
>> +         coord = lp_build_min_ext(coord_bld, coord, length_f,
>> +                                 
>> GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
>>            /* subtract 0.5 */
>>            coord = lp_build_sub(coord_bld, coord, half);
>>            /* clamp to [0, length - 0.5] */
>> @@ -398,7 +408,7 @@ lp_build_sample_wrap_linear(struct
>> lp_build_sample_context *bld,
>>            coord = lp_build_add(coord_bld, coord, offset);
>>         }
>>         /* was: clamp to [-0.5, length + 0.5], then sub 0.5 */
>> -      /* can skip clamp (though might not work for very large coord
>> values */
>> +      /* can skip clamp (though might not work for very large coord
>> values) */
>>         coord = lp_build_sub(coord_bld, coord, half);
>>         /* convert to int, compute lerp weight */
>>         lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
>> @@ -465,7 +475,8 @@ lp_build_sample_wrap_linear(struct
>> lp_build_sample_context *bld,
>>            coord = lp_build_abs(coord_bld, coord);
>>
>>            /* clamp to length max */
>> -         coord = lp_build_min(coord_bld, coord, length_f);
>> +         coord = lp_build_min_ext(coord_bld, coord, length_f,
>> +                                 
>> GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
>>            /* subtract 0.5 */
>>            coord = lp_build_sub(coord_bld, coord, half);
>>            /* clamp to [0, length - 0.5] */
>> @@ -628,9 +639,15 @@ lp_build_sample_wrap_nearest(struct
>> lp_build_sample_context *bld,
>>
>>         /* itrunc == ifloor here */
>>         icoord = lp_build_itrunc(coord_bld, coord);
>> -
>> -      /* clamp to [0, length - 1] */
>> -      icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
>> +      /*
>> +       * Use unsigned min due to possible undef values (NaNs, overflow)
>> +       */
>> +      {
>> +         struct lp_build_context abs_coord_bld = *int_coord_bld;
>> +         abs_coord_bld.type.sign = FALSE;
>> +         /* clamp to [0, length - 1] */
>> +         icoord = lp_build_min(&abs_coord_bld, icoord,
>> length_minus_one);
>> +      }
>>         break;
>>
>>      case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
>>
> 

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to