glsl: Use a separate div_to_mul_rcp lowering flag for integers.
Using multiply and reciprocal for integer division involves potentially lossy floating point conversions. This is okay for older GPUs that represent integers as floating point, but undesirable for GPUs with native integer division instructions. TGSI, for example, has UDIV/IDIV instructions for integer division, so it makes sense to handle this directly. Likewise for i965. Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Signed-off-by: Bryan Cain <bryancain3@gmail.com> Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:

committed by
Kenneth Graunke

parent
87679e2ea1
commit
478034f34a
@@ -35,6 +35,7 @@
|
|||||||
#define POW_TO_EXP2 0x08
|
#define POW_TO_EXP2 0x08
|
||||||
#define LOG_TO_LOG2 0x10
|
#define LOG_TO_LOG2 0x10
|
||||||
#define MOD_TO_FRACT 0x20
|
#define MOD_TO_FRACT 0x20
|
||||||
|
#define INT_DIV_TO_MUL_RCP 0x40
|
||||||
|
|
||||||
bool do_common_optimization(exec_list *ir, bool linked, unsigned max_unroll_iterations);
|
bool do_common_optimization(exec_list *ir, bool linked, unsigned max_unroll_iterations);
|
||||||
|
|
||||||
|
@@ -32,6 +32,7 @@
|
|||||||
* Currently supported transformations:
|
* Currently supported transformations:
|
||||||
* - SUB_TO_ADD_NEG
|
* - SUB_TO_ADD_NEG
|
||||||
* - DIV_TO_MUL_RCP
|
* - DIV_TO_MUL_RCP
|
||||||
|
* - INT_DIV_TO_MUL_RCP
|
||||||
* - EXP_TO_EXP2
|
* - EXP_TO_EXP2
|
||||||
* - POW_TO_EXP2
|
* - POW_TO_EXP2
|
||||||
* - LOG_TO_LOG2
|
* - LOG_TO_LOG2
|
||||||
@@ -47,8 +48,8 @@
|
|||||||
* want to recognize add(op0, neg(op1)) or the other way around to
|
* want to recognize add(op0, neg(op1)) or the other way around to
|
||||||
* produce a subtract anyway.
|
* produce a subtract anyway.
|
||||||
*
|
*
|
||||||
* DIV_TO_MUL_RCP:
|
* DIV_TO_MUL_RCP and INT_DIV_TO_MUL_RCP:
|
||||||
* ---------------
|
* --------------------------------------
|
||||||
* Breaks an ir_unop_div expression down to op0 * (rcp(op1)).
|
* Breaks an ir_unop_div expression down to op0 * (rcp(op1)).
|
||||||
*
|
*
|
||||||
* Many GPUs don't have a divide instruction (945 and 965 included),
|
* Many GPUs don't have a divide instruction (945 and 965 included),
|
||||||
@@ -56,6 +57,10 @@
|
|||||||
* reciprocal. By breaking the operation down, constant reciprocals
|
* reciprocal. By breaking the operation down, constant reciprocals
|
||||||
* can get constant folded.
|
* can get constant folded.
|
||||||
*
|
*
|
||||||
|
* DIV_TO_MUL_RCP only lowers floating point division; INT_DIV_TO_MUL_RCP
|
||||||
|
* handles the integer case, converting to and from floating point so that
|
||||||
|
* RCP is possible.
|
||||||
|
*
|
||||||
* EXP_TO_EXP2 and LOG_TO_LOG2:
|
* EXP_TO_EXP2 and LOG_TO_LOG2:
|
||||||
* ----------------------------
|
* ----------------------------
|
||||||
* Many GPUs don't have a base e log or exponent instruction, but they
|
* Many GPUs don't have a base e log or exponent instruction, but they
|
||||||
@@ -95,6 +100,7 @@ private:
|
|||||||
|
|
||||||
void sub_to_add_neg(ir_expression *);
|
void sub_to_add_neg(ir_expression *);
|
||||||
void div_to_mul_rcp(ir_expression *);
|
void div_to_mul_rcp(ir_expression *);
|
||||||
|
void int_div_to_mul_rcp(ir_expression *);
|
||||||
void mod_to_fract(ir_expression *);
|
void mod_to_fract(ir_expression *);
|
||||||
void exp_to_exp2(ir_expression *);
|
void exp_to_exp2(ir_expression *);
|
||||||
void pow_to_exp2(ir_expression *);
|
void pow_to_exp2(ir_expression *);
|
||||||
@@ -127,18 +133,26 @@ lower_instructions_visitor::sub_to_add_neg(ir_expression *ir)
|
|||||||
void
|
void
|
||||||
lower_instructions_visitor::div_to_mul_rcp(ir_expression *ir)
|
lower_instructions_visitor::div_to_mul_rcp(ir_expression *ir)
|
||||||
{
|
{
|
||||||
if (!ir->operands[1]->type->is_integer()) {
|
assert(ir->operands[1]->type->is_float());
|
||||||
|
|
||||||
/* New expression for the 1.0 / op1 */
|
/* New expression for the 1.0 / op1 */
|
||||||
ir_rvalue *expr;
|
ir_rvalue *expr;
|
||||||
expr = new(ir) ir_expression(ir_unop_rcp,
|
expr = new(ir) ir_expression(ir_unop_rcp,
|
||||||
ir->operands[1]->type,
|
ir->operands[1]->type,
|
||||||
ir->operands[1],
|
ir->operands[1]);
|
||||||
NULL);
|
|
||||||
|
|
||||||
/* op0 / op1 -> op0 * (1.0 / op1) */
|
/* op0 / op1 -> op0 * (1.0 / op1) */
|
||||||
ir->operation = ir_binop_mul;
|
ir->operation = ir_binop_mul;
|
||||||
ir->operands[1] = expr;
|
ir->operands[1] = expr;
|
||||||
} else {
|
|
||||||
|
this->progress = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
lower_instructions_visitor::int_div_to_mul_rcp(ir_expression *ir)
|
||||||
|
{
|
||||||
|
assert(ir->operands[1]->type->is_integer());
|
||||||
|
|
||||||
/* Be careful with integer division -- we need to do it as a
|
/* Be careful with integer division -- we need to do it as a
|
||||||
* float and re-truncate, since rcp(n > 1) of an integer would
|
* float and re-truncate, since rcp(n > 1) of an integer would
|
||||||
* just be 0.
|
* just be 0.
|
||||||
@@ -180,7 +194,6 @@ lower_instructions_visitor::div_to_mul_rcp(ir_expression *ir)
|
|||||||
ir->operands[0] = new(ir) ir_expression(ir_unop_f2i, op0);
|
ir->operands[0] = new(ir) ir_expression(ir_unop_f2i, op0);
|
||||||
}
|
}
|
||||||
ir->operands[1] = NULL;
|
ir->operands[1] = NULL;
|
||||||
}
|
|
||||||
|
|
||||||
this->progress = true;
|
this->progress = true;
|
||||||
}
|
}
|
||||||
@@ -265,7 +278,9 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case ir_binop_div:
|
case ir_binop_div:
|
||||||
if (lowering(DIV_TO_MUL_RCP))
|
if (ir->operands[1]->type->is_integer() && lowering(INT_DIV_TO_MUL_RCP))
|
||||||
|
int_div_to_mul_rcp(ir);
|
||||||
|
else if (ir->operands[1]->type->is_float() && lowering(DIV_TO_MUL_RCP))
|
||||||
div_to_mul_rcp(ir);
|
div_to_mul_rcp(ir);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@@ -100,6 +100,7 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
|
|||||||
lower_instructions(shader->ir,
|
lower_instructions(shader->ir,
|
||||||
MOD_TO_FRACT |
|
MOD_TO_FRACT |
|
||||||
DIV_TO_MUL_RCP |
|
DIV_TO_MUL_RCP |
|
||||||
|
INT_DIV_TO_MUL_RCP |
|
||||||
SUB_TO_ADD_NEG |
|
SUB_TO_ADD_NEG |
|
||||||
EXP_TO_EXP2 |
|
EXP_TO_EXP2 |
|
||||||
LOG_TO_LOG2);
|
LOG_TO_LOG2);
|
||||||
|
@@ -3232,7 +3232,7 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
|
|||||||
/* Lowering */
|
/* Lowering */
|
||||||
do_mat_op_to_vec(ir);
|
do_mat_op_to_vec(ir);
|
||||||
lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2
|
lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2
|
||||||
| LOG_TO_LOG2
|
| LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP
|
||||||
| ((options->EmitNoPow) ? POW_TO_EXP2 : 0)));
|
| ((options->EmitNoPow) ? POW_TO_EXP2 : 0)));
|
||||||
|
|
||||||
progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
|
progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
|
||||||
|
@@ -4982,7 +4982,7 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
|
|||||||
/* Lowering */
|
/* Lowering */
|
||||||
do_mat_op_to_vec(ir);
|
do_mat_op_to_vec(ir);
|
||||||
lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2
|
lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2
|
||||||
| LOG_TO_LOG2
|
| LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP
|
||||||
| ((options->EmitNoPow) ? POW_TO_EXP2 : 0)));
|
| ((options->EmitNoPow) ? POW_TO_EXP2 : 0)));
|
||||||
|
|
||||||
progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
|
progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
|
||||||
|
Reference in New Issue
Block a user