Are there any objections to pushing this? Bryan
On 08/31/2011 01:33 AM, Bryan Cain wrote: > With this patch, there are no piglit regressions on softpipe with native > integers enabled. Unlike my previous patch, this uses integer values of > ~0 and 0 for true and false, respectively, instead of the float values 1.0 > and 0.0. > --- > src/mesa/main/uniforms.c | 6 +- > src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 160 > ++++++++++++++++++++-------- > 2 files changed, 116 insertions(+), 50 deletions(-) > > diff --git a/src/mesa/main/uniforms.c b/src/mesa/main/uniforms.c > index cda840f..fa96fd3 100644 > --- a/src/mesa/main/uniforms.c > +++ b/src/mesa/main/uniforms.c > @@ -777,12 +777,12 @@ set_program_uniform(struct gl_context *ctx, struct > gl_program *program, > if (isUniformBool) { > for (i = 0; i < elems; i++) { > if (basicType == GL_FLOAT) > - uniformVal[i].b = uniformVal[i].f != 0.0f ? 1 : 0; > + uniformVal[i].u = uniformVal[i].f != 0.0f ? ~0 : 0; > else > - uniformVal[i].b = uniformVal[i].u ? 1 : 0; > + uniformVal[i].u = uniformVal[i].u ? ~0 : 0; > > if (!ctx->Const.NativeIntegers) > - uniformVal[i].f = uniformVal[i].b ? 1.0f : 0.0f; > + uniformVal[i].f = uniformVal[i].u ? 1.0f : 0.0f; > } > } > } > diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > index 2266083..c8f790a 100644 > --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp > @@ -385,6 +385,8 @@ public: > void emit_scalar(ir_instruction *ir, unsigned op, > st_dst_reg dst, st_src_reg src0, st_src_reg src1); > > + void try_emit_float_set(ir_instruction *ir, unsigned op, st_dst_reg dst); > + > void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0); > > void emit_scs(ir_instruction *ir, unsigned op, > @@ -562,7 +564,10 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned > op, > } > > this->instructions.push_tail(inst); > - > + > + if (native_integers) > + try_emit_float_set(ir, op, dst); > + > return inst; > } > > @@ -588,6 +593,25 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned > op) > return emit(ir, op, undef_dst, undef_src, undef_src, undef_src); > } > > + /** > + * Emits the code to convert the result of float SET instructions to > integers. > + */ > +void > +glsl_to_tgsi_visitor::try_emit_float_set(ir_instruction *ir, unsigned op, > + st_dst_reg dst) > +{ > + if ((op == TGSI_OPCODE_SEQ || > + op == TGSI_OPCODE_SNE || > + op == TGSI_OPCODE_SGE || > + op == TGSI_OPCODE_SLT)) > + { > + st_src_reg src = st_src_reg(dst); > + src.negate = ~src.negate; > + dst.type = GLSL_TYPE_FLOAT; > + emit(ir, TGSI_OPCODE_F2I, dst, src); > + } > +} > + > /** > * Determines whether to use an integer, unsigned integer, or float opcode > * based on the operands and input opcode, then emits the result. > @@ -604,7 +628,7 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, > unsigned op, > if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT) > type = GLSL_TYPE_FLOAT; > else if (native_integers) > - type = src0.type; > + type = src0.type == GLSL_TYPE_BOOL ? GLSL_TYPE_INT : src0.type; > > #define case4(c, f, i, u) \ > case TGSI_OPCODE_##c: \ > @@ -630,12 +654,7 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, > unsigned op, > case3(SGE, ISGE, USGE); > case3(SLT, ISLT, USLT); > > - case2iu(SHL, SHL); > case2iu(ISHR, USHR); > - case2iu(NOT, NOT); > - case2iu(AND, AND); > - case2iu(OR, OR); > - case2iu(XOR, XOR); > > default: break; > } > @@ -1389,7 +1408,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) > switch (ir->operation) { > case ir_unop_logic_not: > if (result_dst.type != GLSL_TYPE_FLOAT) > - emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], > st_src_reg_for_type(result_dst.type, 0)); > + emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]); > else { > /* Previously 'SEQ dst, src, 0.0' was used for this. However, many > * older GPUs implement SEQ using multiple instructions (i915 uses > two > @@ -1489,10 +1508,10 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) > emit(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]); > break; > case ir_binop_greater: > - emit(ir, TGSI_OPCODE_SGT, result_dst, op[0], op[1]); > + emit(ir, TGSI_OPCODE_SLT, result_dst, op[1], op[0]); > break; > case ir_binop_lequal: > - emit(ir, TGSI_OPCODE_SLE, result_dst, op[0], op[1]); > + emit(ir, TGSI_OPCODE_SGE, result_dst, op[1], op[0]); > break; > case ir_binop_gequal: > emit(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]); > @@ -1605,41 +1624,52 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) > } > > case ir_binop_logic_xor: > - emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); > + if (native_integers) > + emit(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]); > + else > + emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); > break; > > case ir_binop_logic_or: { > - /* After the addition, the value will be an integer on the > - * range [0,2]. Zero stays zero, and positive values become 1.0. > - */ > - glsl_to_tgsi_instruction *add = > - emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); > - if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB && > - result_dst.type == GLSL_TYPE_FLOAT) { > - /* The clamping to [0,1] can be done for free in the fragment > - * shader with a saturate if floats are being used as boolean > values. > - */ > - add->saturate = true; > - } else if (result_dst.type == GLSL_TYPE_FLOAT) { > - /* Negating the result of the addition gives values on the range > - * [-2, 0]. Zero stays zero, and negative values become 1.0. This > - * is achieved using SLT. > + if (native_integers) { > + /* If integers are used as booleans, we can use an actual "or" > + * instruction. > */ > - st_src_reg slt_src = result_src; > - slt_src.negate = ~slt_src.negate; > - emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, > st_src_reg_for_float(0.0)); > + assert(native_integers); > + emit(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]); > } else { > - /* Use an SNE on the result of the addition. Zero stays zero, > - * 1 stays 1, and 2 becomes 1. > + /* After the addition, the value will be an integer on the > + * range [0,2]. Zero stays zero, and positive values become 1.0. > */ > - emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, > st_src_reg_for_int(0)); > + glsl_to_tgsi_instruction *add = > + emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); > + if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { > + /* The clamping to [0,1] can be done for free in the fragment > + * shader with a saturate if floats are being used as boolean > values. > + */ > + add->saturate = true; > + } else { > + /* Negating the result of the addition gives values on the range > + * [-2, 0]. Zero stays zero, and negative values become 1.0. > This > + * is achieved using SLT. > + */ > + st_src_reg slt_src = result_src; > + slt_src.negate = ~slt_src.negate; > + emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, > st_src_reg_for_float(0.0)); > + } > } > break; > } > > case ir_binop_logic_and: > - /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us > "and". */ > - emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); > + /* If native integers are disabled, the bool args are stored as float > 0.0 > + * or 1.0, so "mul" gives us "and". If they're enabled, just use the > + * actual AND opcode. > + */ > + if (native_integers) > + emit(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]); > + else > + emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); > break; > > case ir_binop_dot: > @@ -1662,18 +1692,36 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) > emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]); > break; > case ir_unop_i2f: > - case ir_unop_b2f: > if (native_integers) { > emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]); > break; > } > + /* fallthrough to next case otherwise */ > + case ir_unop_b2f: > + if (native_integers) { > + emit(ir, TGSI_OPCODE_AND, result_dst, op[0], > st_src_reg_for_float(1.0)); > + break; > + } > + /* fallthrough to next case otherwise */ > case ir_unop_i2u: > case ir_unop_u2i: > /* Converting between signed and unsigned integers is a no-op. */ > - case ir_unop_b2i: > - /* Booleans are stored as integers (or floats in GLSL 1.20 and lower). > */ > result_src = op[0]; > break; > + case ir_unop_b2i: > + if (native_integers) { > + /* Booleans are stored as integers using ~0 for true and 0 for > false. > + * GLSL requires that int(bool) return 1 for true and 0 for false. > + * This conversion is done with AND, but it could be done with NEG. > + */ > + emit(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_int(1)); > + } else { > + /* Booleans and integers are both stored as floats when native > + * integers are disabled. > + */ > + result_src = op[0]; > + } > + break; > case ir_unop_f2i: > if (native_integers) > emit(ir, TGSI_OPCODE_F2I, result_dst, op[0]); > @@ -1681,9 +1729,13 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) > emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); > break; > case ir_unop_f2b: > + emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], > st_src_reg_for_float(0.0)); > + break; > case ir_unop_i2b: > - emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], > - st_src_reg_for_type(result_dst.type, 0)); > + if (native_integers) > + emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]); > + else > + emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], > st_src_reg_for_float(0.0)); > break; > case ir_unop_trunc: > emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); > @@ -1711,7 +1763,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) > break; > > case ir_unop_bit_not: > - if (glsl_version >= 130) { > + if (native_integers) { > emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]); > break; > } > @@ -1721,27 +1773,27 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) > break; > } > case ir_binop_lshift: > - if (glsl_version >= 130) { > + if (native_integers) { > emit(ir, TGSI_OPCODE_SHL, result_dst, op[0]); > break; > } > case ir_binop_rshift: > - if (glsl_version >= 130) { > + if (native_integers) { > emit(ir, TGSI_OPCODE_ISHR, result_dst, op[0]); > break; > } > case ir_binop_bit_and: > - if (glsl_version >= 130) { > + if (native_integers) { > emit(ir, TGSI_OPCODE_AND, result_dst, op[0]); > break; > } > case ir_binop_bit_xor: > - if (glsl_version >= 130) { > + if (native_integers) { > emit(ir, TGSI_OPCODE_XOR, result_dst, op[0]); > break; > } > case ir_binop_bit_or: > - if (glsl_version >= 130) { > + if (native_integers) { > emit(ir, TGSI_OPCODE_OR, result_dst, op[0]); > break; > } > @@ -2129,12 +2181,25 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) > > for (i = 0; i < type_size(ir->lhs->type); i++) { > st_src_reg l_src = st_src_reg(l); > + st_src_reg condition_temp = condition; > l_src.swizzle = swizzle_for_size(ir->lhs->type->vector_elements); > > + if (native_integers) { > + /* This is necessary because TGSI's CMP instruction expects the > + * condition to be a float, and we store booleans as integers. > + * If TGSI had a UCMP instruction or similar, this extra > + * instruction would not be necessary. > + */ > + condition_temp = get_temp(glsl_type::vec4_type); > + condition.negate = 0; > + emit(ir, TGSI_OPCODE_I2F, st_dst_reg(condition_temp), condition); > + condition_temp.swizzle = condition.swizzle; > + } > + > if (switch_order) { > - emit(ir, TGSI_OPCODE_CMP, l, condition, l_src, r); > + emit(ir, TGSI_OPCODE_CMP, l, condition_temp, l_src, r); > } else { > - emit(ir, TGSI_OPCODE_CMP, l, condition, r, l_src); > + emit(ir, TGSI_OPCODE_CMP, l, condition_temp, r, l_src); > } > > l.index++; > @@ -2154,6 +2219,7 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) > inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); > new_inst = emit(ir, inst->op, l, inst->src[0], inst->src[1], > inst->src[2]); > new_inst->saturate = inst->saturate; > + inst->dead_mask = inst->dst.writemask; > } else { > for (i = 0; i < type_size(ir->lhs->type); i++) { > emit(ir, TGSI_OPCODE_MOV, l, r); _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev