glsl: Improve precision of mod(x,y)
Currently, Mesa uses the lowering pass MOD_TO_FRACT to implement mod(x,y) as y * fract(x/y). This implementation has a down side though: it introduces precision errors due to the fract() operation. Even worse, since the result of fract() is multiplied by y, the larger y gets the larger the precision error we produce, so for large enough numbers the precision loss is significant. Some examples on i965: Operation Precision error ----------------------------------------------------- mod(-1.951171875, 1.9980468750) 0.0000000447 mod(121.57, 13.29) 0.0000023842 mod(3769.12, 321.99) 0.0000762939 mod(3769.12, 1321.99) 0.0001220703 mod(-987654.125, 123456.984375) 0.0160663128 mod( 987654.125, 123456.984375) 0.0312500000 This patch replaces the current lowering pass with a different one (MOD_TO_FLOOR) that follows the recommended implementation in the GLSL man pages: mod(x,y) = x - y * floor(x/y) This implementation eliminates the precision errors at the expense of an additional add instruction on some systems. On systems that can do negate with multiply-add in a single operation this new implementation would come at no additional cost. v2 (Ian Romanick) - Do not clone operands because when they are expressions we would be duplicating them and that can lead to suboptimal code. Fixes the following 16 dEQP tests: dEQP-GLES3.functional.shaders.builtin_functions.precision.mod.mediump_* dEQP-GLES3.functional.shaders.builtin_functions.precision.mod.highp_* Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
This commit is contained in:
@@ -187,7 +187,7 @@ You may also need to update the backends if they will see the new expr type:
|
||||
|
||||
You can then use the new expression from builtins (if all backends
|
||||
would rather see it), or scan the IR and convert to use your new
|
||||
expression type (see ir_mod_to_fract, for example).
|
||||
expression type (see ir_mod_to_floor, for example).
|
||||
|
||||
Q: How is memory management handled in the compiler?
|
||||
|
||||
|
@@ -34,7 +34,7 @@
|
||||
#define EXP_TO_EXP2 0x04
|
||||
#define POW_TO_EXP2 0x08
|
||||
#define LOG_TO_LOG2 0x10
|
||||
#define MOD_TO_FRACT 0x20
|
||||
#define MOD_TO_FLOOR 0x20
|
||||
#define INT_DIV_TO_MUL_RCP 0x40
|
||||
#define BITFIELD_INSERT_TO_BFM_BFI 0x80
|
||||
#define LDEXP_TO_ARITH 0x100
|
||||
|
@@ -36,7 +36,7 @@
|
||||
* - EXP_TO_EXP2
|
||||
* - POW_TO_EXP2
|
||||
* - LOG_TO_LOG2
|
||||
* - MOD_TO_FRACT
|
||||
* - MOD_TO_FLOOR
|
||||
* - LDEXP_TO_ARITH
|
||||
* - BITFIELD_INSERT_TO_BFM_BFI
|
||||
* - CARRY_TO_ARITH
|
||||
@@ -77,14 +77,17 @@
|
||||
* Many older GPUs don't have an x**y instruction. For these GPUs, convert
|
||||
* x**y to 2**(y * log2(x)).
|
||||
*
|
||||
* MOD_TO_FRACT:
|
||||
* MOD_TO_FLOOR:
|
||||
* -------------
|
||||
* Breaks an ir_binop_mod expression down to (op1 * fract(op0 / op1))
|
||||
* Breaks an ir_binop_mod expression down to (op0 - op1 * floor(op0 / op1))
|
||||
*
|
||||
* Many GPUs don't have a MOD instruction (945 and 965 included), and
|
||||
* if we have to break it down like this anyway, it gives an
|
||||
* opportunity to do things like constant fold the (1.0 / op1) easily.
|
||||
*
|
||||
* Note: before we used to implement this as op1 * fract(op / op1) but this
|
||||
* implementation had significant precision errors.
|
||||
*
|
||||
* LDEXP_TO_ARITH:
|
||||
* -------------
|
||||
* Converts ir_binop_ldexp to arithmetic and bit operations.
|
||||
@@ -136,7 +139,7 @@ private:
|
||||
void sub_to_add_neg(ir_expression *);
|
||||
void div_to_mul_rcp(ir_expression *);
|
||||
void int_div_to_mul_rcp(ir_expression *);
|
||||
void mod_to_fract(ir_expression *);
|
||||
void mod_to_floor(ir_expression *);
|
||||
void exp_to_exp2(ir_expression *);
|
||||
void pow_to_exp2(ir_expression *);
|
||||
void log_to_log2(ir_expression *);
|
||||
@@ -276,22 +279,29 @@ lower_instructions_visitor::log_to_log2(ir_expression *ir)
|
||||
}
|
||||
|
||||
void
|
||||
lower_instructions_visitor::mod_to_fract(ir_expression *ir)
|
||||
lower_instructions_visitor::mod_to_floor(ir_expression *ir)
|
||||
{
|
||||
ir_variable *temp = new(ir) ir_variable(ir->operands[1]->type, "mod_b",
|
||||
ir_var_temporary);
|
||||
this->base_ir->insert_before(temp);
|
||||
ir_variable *x = new(ir) ir_variable(ir->operands[0]->type, "mod_x",
|
||||
ir_var_temporary);
|
||||
ir_variable *y = new(ir) ir_variable(ir->operands[1]->type, "mod_y",
|
||||
ir_var_temporary);
|
||||
this->base_ir->insert_before(x);
|
||||
this->base_ir->insert_before(y);
|
||||
|
||||
ir_assignment *const assign =
|
||||
new(ir) ir_assignment(new(ir) ir_dereference_variable(temp),
|
||||
ir->operands[1], NULL);
|
||||
ir_assignment *const assign_x =
|
||||
new(ir) ir_assignment(new(ir) ir_dereference_variable(x),
|
||||
ir->operands[0], NULL);
|
||||
ir_assignment *const assign_y =
|
||||
new(ir) ir_assignment(new(ir) ir_dereference_variable(y),
|
||||
ir->operands[1], NULL);
|
||||
|
||||
this->base_ir->insert_before(assign);
|
||||
this->base_ir->insert_before(assign_x);
|
||||
this->base_ir->insert_before(assign_y);
|
||||
|
||||
ir_expression *const div_expr =
|
||||
new(ir) ir_expression(ir_binop_div, ir->operands[0]->type,
|
||||
ir->operands[0],
|
||||
new(ir) ir_dereference_variable(temp));
|
||||
new(ir) ir_expression(ir_binop_div, x->type,
|
||||
new(ir) ir_dereference_variable(x),
|
||||
new(ir) ir_dereference_variable(y));
|
||||
|
||||
/* Don't generate new IR that would need to be lowered in an additional
|
||||
* pass.
|
||||
@@ -299,14 +309,17 @@ lower_instructions_visitor::mod_to_fract(ir_expression *ir)
|
||||
if (lowering(DIV_TO_MUL_RCP))
|
||||
div_to_mul_rcp(div_expr);
|
||||
|
||||
ir_rvalue *expr = new(ir) ir_expression(ir_unop_fract,
|
||||
ir->operands[0]->type,
|
||||
div_expr,
|
||||
NULL);
|
||||
ir_expression *const floor_expr =
|
||||
new(ir) ir_expression(ir_unop_floor, x->type, div_expr);
|
||||
|
||||
ir->operation = ir_binop_mul;
|
||||
ir->operands[0] = new(ir) ir_dereference_variable(temp);
|
||||
ir->operands[1] = expr;
|
||||
ir_expression *const mul_expr =
|
||||
new(ir) ir_expression(ir_binop_mul,
|
||||
new(ir) ir_dereference_variable(y),
|
||||
floor_expr);
|
||||
|
||||
ir->operation = ir_binop_sub;
|
||||
ir->operands[0] = new(ir) ir_dereference_variable(x);
|
||||
ir->operands[1] = mul_expr;
|
||||
this->progress = true;
|
||||
}
|
||||
|
||||
@@ -535,8 +548,8 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
|
||||
break;
|
||||
|
||||
case ir_binop_mod:
|
||||
if (lowering(MOD_TO_FRACT) && ir->type->is_float())
|
||||
mod_to_fract(ir);
|
||||
if (lowering(MOD_TO_FLOOR) && ir->type->is_float())
|
||||
mod_to_floor(ir);
|
||||
break;
|
||||
|
||||
case ir_binop_pow:
|
||||
|
@@ -877,7 +877,7 @@ fs_visitor::visit(ir_expression *ir)
|
||||
break;
|
||||
}
|
||||
case ir_binop_mod:
|
||||
/* Floating point should be lowered by MOD_TO_FRACT in the compiler. */
|
||||
/* Floating point should be lowered by MOD_TO_FLOOR in the compiler. */
|
||||
assert(ir->type->is_integer());
|
||||
emit_math(SHADER_OPCODE_INT_REMAINDER, this->result, op[0], op[1]);
|
||||
break;
|
||||
|
@@ -157,7 +157,7 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
|
||||
? BITFIELD_INSERT_TO_BFM_BFI
|
||||
: 0;
|
||||
lower_instructions(shader->base.ir,
|
||||
MOD_TO_FRACT |
|
||||
MOD_TO_FLOOR |
|
||||
DIV_TO_MUL_RCP |
|
||||
SUB_TO_ADD_NEG |
|
||||
EXP_TO_EXP2 |
|
||||
|
@@ -1522,7 +1522,7 @@ vec4_visitor::visit(ir_expression *ir)
|
||||
break;
|
||||
}
|
||||
case ir_binop_mod:
|
||||
/* Floating point should be lowered by MOD_TO_FRACT in the compiler. */
|
||||
/* Floating point should be lowered by MOD_TO_FLOOR in the compiler. */
|
||||
assert(ir->type->is_integer());
|
||||
emit_math(SHADER_OPCODE_INT_REMAINDER, result_dst, op[0], op[1]);
|
||||
break;
|
||||
|
@@ -1152,7 +1152,7 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
|
||||
assert(!"not reached: should be handled by ir_div_to_mul_rcp");
|
||||
break;
|
||||
case ir_binop_mod:
|
||||
/* Floating point should be lowered by MOD_TO_FRACT in the compiler. */
|
||||
/* Floating point should be lowered by MOD_TO_FLOOR in the compiler. */
|
||||
assert(ir->type->is_integer());
|
||||
emit(ir, OPCODE_MUL, result_dst, op[0], op[1]);
|
||||
break;
|
||||
@@ -2943,7 +2943,7 @@ _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
|
||||
|
||||
/* Lowering */
|
||||
do_mat_op_to_vec(ir);
|
||||
lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2
|
||||
lower_instructions(ir, (MOD_TO_FLOOR | DIV_TO_MUL_RCP | EXP_TO_EXP2
|
||||
| LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP
|
||||
| ((options->EmitNoPow) ? POW_TO_EXP2 : 0)));
|
||||
|
||||
|
@@ -5419,7 +5419,7 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
|
||||
lower_offset_arrays(ir);
|
||||
do_mat_op_to_vec(ir);
|
||||
lower_instructions(ir,
|
||||
MOD_TO_FRACT |
|
||||
MOD_TO_FLOOR |
|
||||
DIV_TO_MUL_RCP |
|
||||
EXP_TO_EXP2 |
|
||||
LOG_TO_LOG2 |
|
||||
|
Reference in New Issue
Block a user