Am 25.04.2016 um 15:44 schrieb Jose Fonseca: > Looks good AFAICT. Can there be an impact in performance? Yes, but I think it should be fairly minimal for the affected modes - at least int compare / min instructions are cheap (too bad we only have unsigned min with/max with sse41 but not compare). mirror_repeat (using coord_mirror) actually might not be optimal - probably could use unsigned min/max later there instead of signed ones and skip the extra float min. But it's always a question what the cpu actually supports - if there's only sse2 integer min/max have to be emulated (and since there's no unsigned comparisons, emulation gets more complex in this case), float min/max not.
> > Reviewed-by: Jose Fonseca <jfons...@vmware.com> > > I also think we should have piglit testcases for these things. Would it > be easy to modify one of the existing textrwap to use Nans? We need to > use BGRA8 and something else (e.g., RGBA32F) to covert both AoS/SoA paths. Sounds like a good idea (I suppose though it's really only interesting for sw based drivers). Roland > > Jose > > > > On 22/04/16 14:33, srol...@vmware.com wrote: >> From: Roland Scheidegger <srol...@vmware.com> >> >> Some cases (especially these using fract for coord wrapping) did not >> handle >> NaNs (or Infs) correctly - the following code assumed the fract result >> could not be outside [0,1], but if the input is a NaN (or +-Inf) the >> fract >> result was NaN - which then could produce out-of-bound offsets. >> >> (Note that the explicit NaN behavior changes for min/max on x86 sse don't >> result in actual changes in the generated jit code, but may on other >> architectures. Found by looking through all the wrap functions.) >> >> This fixes https://bugs.freedesktop.org/show_bug.cgi?id=94955 >> >> Cc: "11.1 11.2" <mesa-sta...@lists.freedesktop.org> >> --- >> src/gallium/auxiliary/gallivm/lp_bld_arit.c | 9 ++++--- >> src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c | 13 ++++++++- >> src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 33 >> +++++++++++++++++------ >> 3 files changed, 42 insertions(+), 13 deletions(-) >> >> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c >> b/src/gallium/auxiliary/gallivm/lp_bld_arit.c >> index beff414..17cf296 100644 >> --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c >> +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c >> @@ -2069,8 +2069,8 @@ lp_build_fract(struct lp_build_context *bld, >> >> >> /** >> - * Prevent returning a fractional part of 1.0 for very small negative >> values of >> - * 'a' by clamping against 0.99999(9). >> + * Prevent returning 1.0 for very small negative values of 'a' by >> clamping >> + * against 0.99999(9). (Will also return that value for NaNs.) >> */ >> static inline LLVMValueRef >> clamp_fract(struct lp_build_context *bld, LLVMValueRef fract) >> @@ -2080,13 +2080,14 @@ clamp_fract(struct lp_build_context *bld, >> LLVMValueRef fract) >> /* this is the largest number smaller than 1.0 representable as >> float */ >> max = lp_build_const_vec(bld->gallivm, bld->type, >> 1.0 - 1.0/(1LL << >> (lp_mantissa(bld->type) + 1))); >> - return lp_build_min(bld, fract, max); >> + return lp_build_min_ext(bld, fract, max, >> + GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN); > >> } >> >> >> /** >> * Same as lp_build_fract, but guarantees that the result is always >> smaller >> - * than one. >> + * than one. Will also return the smaller-than-one value for infs, NaNs. >> */ >> LLVMValueRef >> lp_build_fract_safe(struct lp_build_context *bld, >> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c >> b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c >> index 729c5b8..6bf92c8 100644 >> --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c >> +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c >> @@ -246,6 +246,12 @@ lp_build_coord_repeat_npot_linear_int(struct >> lp_build_sample_context *bld, >> mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type, >> PIPE_FUNC_LESS, *coord0_i, >> int_coord_bld->zero); >> *coord0_i = lp_build_select(int_coord_bld, mask, >> length_minus_one, *coord0_i); >> + /* >> + * We should never get values too large - except if coord was nan >> or inf, >> + * in which case things go terribly wrong... >> + * Alternatively, could use fract_safe above... >> + */ >> + *coord0_i = lp_build_min(int_coord_bld, *coord0_i, length_minus_one); >> } >> >> >> @@ -490,6 +496,10 @@ lp_build_sample_wrap_linear_float(struct >> lp_build_sample_context *bld, >> *coord1 = lp_build_add(coord_bld, coord, half); >> coord = lp_build_sub(coord_bld, coord, half); >> *weight = lp_build_fract(coord_bld, coord); >> + /* >> + * It is important for this comparison to be unordered >> + * (or need fract_safe above). >> + */ >> mask = lp_build_compare(coord_bld->gallivm, coord_bld->type, >> PIPE_FUNC_LESS, coord, >> coord_bld->zero); >> *coord0 = lp_build_select(coord_bld, mask, >> length_minus_one, coord); >> @@ -514,7 +524,8 @@ lp_build_sample_wrap_linear_float(struct >> lp_build_sample_context *bld, >> coord = lp_build_sub(coord_bld, coord, half); >> } >> /* clamp to [0, length - 1] */ >> - coord = lp_build_min(coord_bld, coord, length_minus_one); >> + coord = lp_build_min_ext(coord_bld, coord, length_minus_one, >> + GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN); >> coord = lp_build_max(coord_bld, coord, coord_bld->zero); >> *coord1 = lp_build_add(coord_bld, coord, coord_bld->one); >> /* convert to int, compute lerp weight */ >> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c >> b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c >> index 1727105..ace24fd 100644 >> --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c >> +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c >> @@ -228,11 +228,15 @@ lp_build_coord_mirror(struct >> lp_build_sample_context *bld, >> LLVMValueRef fract, flr, isOdd; >> >> lp_build_ifloor_fract(coord_bld, coord, &flr, &fract); >> + /* kill off NaNs */ >> + fract = lp_build_min_ext(coord_bld, fract, coord_bld->zero, >> + GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN); >> >> /* isOdd = flr & 1 */ >> isOdd = LLVMBuildAnd(bld->gallivm->builder, flr, >> int_coord_bld->one, ""); >> >> /* make coord positive or negative depending on isOdd */ >> + /* XXX slight overkill masking out sign bit is unnecessary */ >> coord = lp_build_set_sign(coord_bld, fract, isOdd); >> >> /* convert isOdd to float */ >> @@ -272,10 +276,15 @@ lp_build_coord_repeat_npot_linear(struct >> lp_build_sample_context *bld, >> * we avoided the 0.5/length division before the repeat wrap, >> * now need to fix up edge cases with selects >> */ >> + /* >> + * Note we do a float (unordered) compare so we can eliminate NaNs. >> + * (Otherwise would need fract_safe above). >> + */ >> + mask = lp_build_compare(coord_bld->gallivm, coord_bld->type, >> + PIPE_FUNC_LESS, coord_f, coord_bld->zero); >> + >> /* convert to int, compute lerp weight */ >> lp_build_ifloor_fract(coord_bld, coord_f, coord0_i, weight_f); >> - mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type, >> - PIPE_FUNC_LESS, *coord0_i, >> int_coord_bld->zero); >> *coord0_i = lp_build_select(int_coord_bld, mask, >> length_minus_one, *coord0_i); >> } >> >> @@ -375,7 +384,8 @@ lp_build_sample_wrap_linear(struct >> lp_build_sample_context *bld, >> } >> >> /* clamp to length max */ >> - coord = lp_build_min(coord_bld, coord, length_f); >> + coord = lp_build_min_ext(coord_bld, coord, length_f, >> + >> GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN); >> /* subtract 0.5 */ >> coord = lp_build_sub(coord_bld, coord, half); >> /* clamp to [0, length - 0.5] */ >> @@ -398,7 +408,7 @@ lp_build_sample_wrap_linear(struct >> lp_build_sample_context *bld, >> coord = lp_build_add(coord_bld, coord, offset); >> } >> /* was: clamp to [-0.5, length + 0.5], then sub 0.5 */ >> - /* can skip clamp (though might not work for very large coord >> values */ >> + /* can skip clamp (though might not work for very large coord >> values) */ >> coord = lp_build_sub(coord_bld, coord, half); >> /* convert to int, compute lerp weight */ >> lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); >> @@ -465,7 +475,8 @@ lp_build_sample_wrap_linear(struct >> lp_build_sample_context *bld, >> coord = lp_build_abs(coord_bld, coord); >> >> /* clamp to length max */ >> - coord = lp_build_min(coord_bld, coord, length_f); >> + coord = lp_build_min_ext(coord_bld, coord, length_f, >> + >> GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN); >> /* subtract 0.5 */ >> coord = lp_build_sub(coord_bld, coord, half); >> /* clamp to [0, length - 0.5] */ >> @@ -628,9 +639,15 @@ lp_build_sample_wrap_nearest(struct >> lp_build_sample_context *bld, >> >> /* itrunc == ifloor here */ >> icoord = lp_build_itrunc(coord_bld, coord); >> - >> - /* clamp to [0, length - 1] */ >> - icoord = lp_build_min(int_coord_bld, icoord, length_minus_one); >> + /* >> + * Use unsigned min due to possible undef values (NaNs, overflow) >> + */ >> + { >> + struct lp_build_context abs_coord_bld = *int_coord_bld; >> + abs_coord_bld.type.sign = FALSE; >> + /* clamp to [0, length - 1] */ >> + icoord = lp_build_min(&abs_coord_bld, icoord, >> length_minus_one); >> + } >> break; >> >> case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: >> > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev