On Fri, 2015-09-18 at 15:02 +0300, Tapani Pälli wrote: > Fixes bugs exposed by commit > 2b1cdb0eddb73f62e4848d4b64840067f1f70865 in: > ES3-CTS.gtf.GL3Tests.shadow.shadow_execution_frag > > No regressions observed in deqp, CTS or Piglit. > > v2: address review feedback from Iago Toral: > - move rho calculation to else branch > - optimize dx and dy calculation > - fix documentation inconsistensies > > Signed-off-by: Tapani Pälli <tapani.pa...@intel.com> > Signed-off-by: Kevin Rogovin <kevin.rogo...@intel.com> > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91114 > Cc: "10.6 11.0" <mesa-sta...@lists.freedesktop.org> > --- > .../dri/i965/brw_lower_texture_gradients.cpp | 200 > +++++++++++++++++++-- > 1 file changed, 181 insertions(+), 19 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp > b/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp > index 7a5f983..03dc021 100644 > --- a/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp > +++ b/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp > @@ -48,6 +48,7 @@ public: > > private: > void emit(ir_variable *, ir_rvalue *); > + ir_variable *temp(void *ctx, const glsl_type *type, const char *name); > }; > > /** > @@ -60,6 +61,17 @@ lower_texture_grad_visitor::emit(ir_variable *var, > ir_rvalue *value) > base_ir->insert_before(assign(var, value)); > } > > +/** > + * Emit a temporary variable declaration > + */ > +ir_variable * > +lower_texture_grad_visitor::temp(void *ctx, const glsl_type *type, const > char *name) > +{ > + ir_variable *var = new(ctx) ir_variable(type, name, ir_var_temporary); > + base_ir->insert_before(var); > + return var; > +} > + > static const glsl_type * > txs_type(const glsl_type *type) > { > @@ -144,28 +156,178 @@ lower_texture_grad_visitor::visit_leave(ir_texture *ir) > new(mem_ctx) ir_variable(grad_type, "dPdy", ir_var_temporary); > emit(dPdy, mul(size, ir->lod_info.grad.dPdy)); > > - /* Calculate rho from equation 3.20 of the GL 3.0 specification. */ > - ir_rvalue *rho; > - if (dPdx->type->is_scalar()) { > - rho = expr(ir_binop_max, expr(ir_unop_abs, dPdx), > - expr(ir_unop_abs, dPdy)); > - } else { > - rho = expr(ir_binop_max, expr(ir_unop_sqrt, dot(dPdx, dPdx)), > - expr(ir_unop_sqrt, dot(dPdy, dPdy))); > - } > - > - /* lambda_base = log2(rho). We're ignoring GL state biases for now. > - * > - * For cube maps the result of these formulas is giving us a value of rho > - * that is twice the value we should use, so divide it by 2 or, > - * alternatively, remove one unit from the result of the log2 computation. > - */ > ir->op = ir_txl; > if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) { > - ir->lod_info.lod = expr(ir_binop_add, > - expr(ir_unop_log2, rho), > - new(mem_ctx) ir_constant(-1.0f)); > + /* Cubemap texture lookups first generate a texture coordinate > normalized > + * to [-1, 1] on the appropiate face. The appropiate face is determined > + * by which component has largest magnitude and its sign. The texture > + * coordinate is the quotient of the remaining texture coordinates > against > + * that absolute value of the component of largest magnitude. This > + * division requires that the computing of the derivative of the texel > + * coordinate must use the quotient rule. The high level GLSL code is > as > + * follows: > + * > + * Step 1: selection > + * > + * vec3 abs_p, Q, dQdx, dQdy; > + * abs_p = abs(ir->coordinate); > + * if (abs_p.x >= max(abs_p.y, abs_p.z)) { > + * Q = ir->coordinate.yzx; > + * dQdx = ir->lod_info.grad.dPdx.yzx; > + * dQdy = ir->lod_info.grad.dPdy.yzx; > + * } > + * if (abs_p.y >= max(abs_p.x, abs_p.z)) { > + * Q = ir->coordinate.xzy; > + * dQdx = ir->lod_info.grad.dPdx.xzy; > + * dQdy = ir->lod_info.grad.dPdy.xzy; > + * } > + * if (abs_p.z >= max(abs_p.x, abs_p.y)) { > + * Q = ir->coordinate; > + * dQdx = ir->lod_info.grad.dPdx; > + * dQdy = ir->lod_info.grad.dPdy; > + * } > + * > + * Step 2: use quotient rule to compute derivative. The normalized to > + * [-1, 1] texel coordinate is given by Q.xy / (sign(Q.z) * Q.z). We > are > + * only concerned with the magnitudes of the derivatives whose values > are > + * not affected by the sign. We drop the sign from the computation. > + * > + * vec2 dx, dy; > + * float recip; > + * > + * recip = 1.0 / Q.z; > + * dx = recip * ( dqdx.xy - Q.xy * (dQdx.z * recip) ); > + * dy = recip * ( dqdy.xy - Q.xy * (dQdy.z * recip) ); ^^^^^^^^ dQdx and dQdy in the 2 lines above
> + * Step 3: compute LOD. At this point we have the derivatives of the > + * texture coordinates normalized to [-1,1]. We take the LOD to be > + * result = log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * 0.5 * L) > + * = -1.0 + log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * L) > + * = -1.0 + log2(sqrt(max(dot(dx, dx), dot(dy,dy))) * L) > + * = -1.0 + log2(sqrt(L * L * max(dot(dx, dx), dot(dy,dy)))) > + * = -1.0 + 0.5 * log2(L * L * max(dot(dx, dx), dot(dy,dy))) > + * where L is the dimension of the cubemap. The code is: > + * > + * float M, result; > + * M = max(dot(dx, dx), dot(dy, dy)); > + * L = textureSize(sampler, 0).x; > + * result = -1.0 + 0.5 * log2(L * L * M); > + */ > + > +/* Helpers to make code more human readable. */ > +#define EMIT(instr) base_ir->insert_before(instr) > +#define THEN(irif, instr) irif->then_instructions.push_tail(instr) > +#define CLONE(x) x->clone(mem_ctx, NULL) > + > + ir_variable *abs_p = temp(mem_ctx, glsl_type::vec3_type, "abs_p"); > + > + EMIT(assign(abs_p, swizzle_for_size(abs(CLONE(ir->coordinate)), 3))); > + > + ir_variable *Q = temp(mem_ctx, glsl_type::vec3_type, "Q"); > + ir_variable *dQdx = temp(mem_ctx, glsl_type::vec3_type, "dQdx"); > + ir_variable *dQdy = temp(mem_ctx, glsl_type::vec3_type, "dQdy"); > + > + /* unmodified dPdx, dPdy values */ > + ir_rvalue *dPdx = ir->lod_info.grad.dPdx; > + ir_rvalue *dPdy = ir->lod_info.grad.dPdy; > + > + /* 1. compute selector */ > + > + /* if (abs_p.x >= max(abs_p.y, abs_p.z)) ... */ > + ir_if *branch_x = > + new(mem_ctx) ir_if(gequal(swizzle_x(abs_p), > + max2(swizzle_y(abs_p), > swizzle_z(abs_p)))); > + > + /* Q = p.yzx; > + * dQdx = dPdx.yzx; > + * dQdy = dPdy.yzx; > + */ > + int yzx = MAKE_SWIZZLE4(SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, 0); > + THEN(branch_x, assign(Q, swizzle(CLONE(ir->coordinate), yzx, 3))); > + THEN(branch_x, assign(dQdx, swizzle(CLONE(dPdx), yzx, 3))); > + THEN(branch_x, assign(dQdy, swizzle(CLONE(dPdy), yzx, 3))); > + EMIT(branch_x); > + > + /* if (abs_p.y >= max(abs_p.x, abs_p.z)) */ > + ir_if *branch_y = > + new(mem_ctx) ir_if(gequal(swizzle_y(abs_p), > + max2(swizzle_x(abs_p), > swizzle_z(abs_p)))); > + > + /* Q = p.xzy; > + * dQdx = dPdx.xzy; > + * dQdy = dPdy.xzy; > + */ > + int xzy = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Z, SWIZZLE_Y, 0); > + THEN(branch_y, assign(Q, swizzle(CLONE(ir->coordinate), xzy, 3))); > + THEN(branch_y, assign(dQdx, swizzle(CLONE(dPdx), xzy, 3))); > + THEN(branch_y, assign(dQdy, swizzle(CLONE(dPdy), xzy, 3))); > + EMIT(branch_y); > + > + /* if (abs_p.z >= max(abs_p.x, abs_p.y)) */ > + ir_if *branch_z = > + new(mem_ctx) ir_if(gequal(swizzle_z(abs_p), > + max2(swizzle_x(abs_p), swizzle_y(abs_p)))); > + > + /* Q = p; > + * dQdx = dPdx; > + * dQdy = dPdy; > + */ > + THEN(branch_z, assign(Q, swizzle_for_size(CLONE(ir->coordinate), 3))); > + THEN(branch_z, assign(dQdx, CLONE(dPdx))); > + THEN(branch_z, assign(dQdy, CLONE(dPdy))); > + EMIT(branch_z); > + > + /* 2. quotient rule */ > + ir_variable *recip = temp(mem_ctx, glsl_type::float_type, "recip"); > + EMIT(assign(recip, div(new(mem_ctx) ir_constant(1.0f), swizzle_z(Q)))); > + > + ir_variable *dx = temp(mem_ctx, glsl_type::vec2_type, "dx"); > + ir_variable *dy = temp(mem_ctx, glsl_type::vec2_type, "dy"); > + > + /* dx = recip * ( dQdx.xy - Q.xy * (dQdx.z * recip) ); > + * dy = recip * ( dQdy.xy - Q.xy * (dQdy.z * recip) ); > + */ Maybe update the comment above so it reflects the change you just added in this version: /* dx = recip * ( dQdx.xy - (Q.xy * recip) * dQdx.z) ); * dy = recip * ( dQdy.xy - (Q.xy * recip) * dQdy.z) ); */ > + ir_variable *tmp = temp(mem_ctx, glsl_type::vec2_type, "tmp"); > + EMIT(assign(tmp, mul(swizzle_xy(Q), recip))); > + EMIT(assign(dx, mul(recip, sub(swizzle_xy(dQdx), > + mul(tmp, swizzle_z(dQdx)))))); > + EMIT(assign(dy, mul(recip, sub(swizzle_xy(dQdy), > + mul(tmp, swizzle_z(dQdy)))))); > + > + /* M = max(dot(dx, dx), dot(dy, dy)); */ > + ir_variable *M = temp(mem_ctx, glsl_type::float_type, "M"); > + EMIT(assign(M, max2(dot(dx, dx), dot(dy, dy)))); > + > + /* size has textureSize() of LOD 0 */ > + ir_variable *L = temp(mem_ctx, glsl_type::float_type, "L"); > + EMIT(assign(L, swizzle_x(size))); > + > + ir_variable *result = temp(mem_ctx, glsl_type::float_type, "result"); > + > + /* result = -1.0 + 0.5 * log2(L * L * M); */ > + EMIT(assign(result, > + add(new(mem_ctx)ir_constant(-1.0f), > + mul(new(mem_ctx)ir_constant(0.5f), > + expr(ir_unop_log2, mul(mul(L, L), M)))))); > + > + /* 3. final assignment of parameters to textureLod call */ > + ir->lod_info.lod = new (mem_ctx) ir_dereference_variable(result); > + > +#undef THEN > +#undef EMIT > + > } else { > + /* Calculate rho from equation 3.20 of the GL 3.0 specification. */ > + ir_rvalue *rho; > + if (dPdx->type->is_scalar()) { > + rho = expr(ir_binop_max, expr(ir_unop_abs, dPdx), > + expr(ir_unop_abs, dPdy)); > + } else { > + rho = expr(ir_binop_max, expr(ir_unop_sqrt, dot(dPdx, dPdx)), > + expr(ir_unop_sqrt, dot(dPdy, dPdy))); > + } > + > + /* lambda_base = log2(rho). We're ignoring GL state biases for now. */ > ir->lod_info.lod = expr(ir_unop_log2, rho); > } Reviewed-by: Iago Toral Quiroga <ito...@igalia.com> It would be nice if you could also add a reference to any spec/document where the underlying Math is explained (if there is a specific source material that Kevin used for it). If there is such thing, I would add that in the comment where you enumerate the steps, right before Step 1. Iago _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev