From: Nicolai Hähnle <nicolai.haeh...@amd.com> Reviewed-by: Marek Olšák <marek.ol...@amd.com> Reviewed-by: Iago Toral Quiroga <ito...@igalia.com> --- src/compiler/glsl/ir_optimization.h | 4 +++- src/compiler/glsl/lower_instructions.cpp | 19 +++++++++++-------- 2 files changed, 14 insertions(+), 9 deletions(-)
diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_optimization.h index 0d6c4e6..01e5270 100644 --- a/src/compiler/glsl/ir_optimization.h +++ b/src/compiler/glsl/ir_optimization.h @@ -23,39 +23,41 @@ /** * \file ir_optimization.h * * Prototypes for optimization passes to be called by the compiler and drivers. */ /* Operations for lower_instructions() */ #define SUB_TO_ADD_NEG 0x01 -#define DIV_TO_MUL_RCP 0x02 +#define FDIV_TO_MUL_RCP 0x02 #define EXP_TO_EXP2 0x04 #define POW_TO_EXP2 0x08 #define LOG_TO_LOG2 0x10 #define MOD_TO_FLOOR 0x20 #define INT_DIV_TO_MUL_RCP 0x40 #define LDEXP_TO_ARITH 0x80 #define CARRY_TO_ARITH 0x100 #define BORROW_TO_ARITH 0x200 #define SAT_TO_CLAMP 0x400 #define DOPS_TO_DFRAC 0x800 #define DFREXP_DLDEXP_TO_ARITH 0x1000 #define BIT_COUNT_TO_MATH 0x02000 #define EXTRACT_TO_SHIFTS 0x04000 #define INSERT_TO_SHIFTS 0x08000 #define REVERSE_TO_SHIFTS 0x10000 #define FIND_LSB_TO_FLOAT_CAST 0x20000 #define FIND_MSB_TO_FLOAT_CAST 0x40000 #define IMUL_HIGH_TO_MUL 0x80000 +#define DDIV_TO_MUL_RCP 0x100000 +#define DIV_TO_MUL_RCP (FDIV_TO_MUL_RCP | DDIV_TO_MUL_RCP) /** * \see class lower_packing_builtins_visitor */ enum lower_packing_builtins_op { LOWER_PACK_UNPACK_NONE = 0x0000, LOWER_PACK_SNORM_2x16 = 0x0001, LOWER_UNPACK_SNORM_2x16 = 0x0002, diff --git a/src/compiler/glsl/lower_instructions.cpp b/src/compiler/glsl/lower_instructions.cpp index 9fc83d1..729cb13 100644 --- a/src/compiler/glsl/lower_instructions.cpp +++ b/src/compiler/glsl/lower_instructions.cpp @@ -47,32 +47,34 @@ * SUB_TO_ADD_NEG: * --------------- * Breaks an ir_binop_sub expression down to add(op0, neg(op1)) * * This simplifies expression reassociation, and for many backends * there is no subtract operation separate from adding the negation. * For backends with native subtract operations, they will probably * want to recognize add(op0, neg(op1)) or the other way around to * produce a subtract anyway. * - * DIV_TO_MUL_RCP and INT_DIV_TO_MUL_RCP: - * -------------------------------------- + * FDIV_TO_MUL_RCP, DDIV_TO_MUL_RCP, and INT_DIV_TO_MUL_RCP: + * --------------------------------------------------------- * Breaks an ir_binop_div expression down to op0 * (rcp(op1)). * * Many GPUs don't have a divide instruction (945 and 965 included), * but they do have an RCP instruction to compute an approximate * reciprocal. By breaking the operation down, constant reciprocals * can get constant folded. * - * DIV_TO_MUL_RCP only lowers floating point division; INT_DIV_TO_MUL_RCP - * handles the integer case, converting to and from floating point so that - * RCP is possible. + * FDIV_TO_MUL_RCP only lowers single-precision floating point division; + * DDIV_TO_MUL_RCP only lowers double-precision floating point division. + * DIV_TO_MUL_RCP is a convenience macro that sets both flags. + * INT_DIV_TO_MUL_RCP handles the integer case, converting to and from floating + * point so that RCP is possible. * * EXP_TO_EXP2 and LOG_TO_LOG2: * ---------------------------- * Many GPUs don't have a base e log or exponent instruction, but they * do have base 2 versions, so this pass converts exp and log to exp2 * and log2 operations. * * POW_TO_EXP2: * ----------- * Many older GPUs don't have an x**y instruction. For these GPUs, convert @@ -319,21 +321,22 @@ lower_instructions_visitor::mod_to_floor(ir_expression *ir) this->base_ir->insert_before(assign_y); ir_expression *const div_expr = new(ir) ir_expression(ir_binop_div, x->type, new(ir) ir_dereference_variable(x), new(ir) ir_dereference_variable(y)); /* Don't generate new IR that would need to be lowered in an additional * pass. */ - if (lowering(DIV_TO_MUL_RCP) && (ir->type->is_float() || ir->type->is_double())) + if ((lowering(FDIV_TO_MUL_RCP) && ir->type->is_float()) || + (lowering(DDIV_TO_MUL_RCP) && ir->type->is_double())) div_to_mul_rcp(div_expr); ir_expression *const floor_expr = new(ir) ir_expression(ir_unop_floor, x->type, div_expr); if (lowering(DOPS_TO_DFRAC) && ir->type->is_double()) dfloor_to_dfrac(floor_expr); ir_expression *const mul_expr = new(ir) ir_expression(ir_binop_mul, @@ -1592,22 +1595,22 @@ lower_instructions_visitor::visit_leave(ir_expression *ir) double_lrp(ir); break; case ir_binop_sub: if (lowering(SUB_TO_ADD_NEG)) sub_to_add_neg(ir); break; case ir_binop_div: if (ir->operands[1]->type->is_integer() && lowering(INT_DIV_TO_MUL_RCP)) int_div_to_mul_rcp(ir); - else if ((ir->operands[1]->type->is_float() || - ir->operands[1]->type->is_double()) && lowering(DIV_TO_MUL_RCP)) + else if ((ir->operands[1]->type->is_float() && lowering(FDIV_TO_MUL_RCP)) || + (ir->operands[1]->type->is_double() && lowering(DDIV_TO_MUL_RCP))) div_to_mul_rcp(ir); break; case ir_unop_exp: if (lowering(EXP_TO_EXP2)) exp_to_exp2(ir); break; case ir_unop_log: if (lowering(LOG_TO_LOG2)) -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev