nir/lower_double_ops: lower round_even()

At least i965 hardware does not have native support for round_even() on doubles.

Reviewed-by: Matt Turner <mattst88@gmail.com>
This commit is contained in:
Iago Toral Quiroga
2016-01-05 10:32:49 +01:00
committed by Samuel Iglesias Gonsálvez
parent bf91df7f7f
commit 072613b3f3
2 changed files with 60 additions and 1 deletions

View File

@@ -2420,7 +2420,8 @@ typedef enum {
nir_lower_dtrunc = (1 << 3), nir_lower_dtrunc = (1 << 3),
nir_lower_dfloor = (1 << 4), nir_lower_dfloor = (1 << 4),
nir_lower_dceil = (1 << 5), nir_lower_dceil = (1 << 5),
nir_lower_dfract = (1 << 6) nir_lower_dfract = (1 << 6),
nir_lower_dround_even = (1 << 7)
} nir_lower_doubles_options; } nir_lower_doubles_options;
void nir_lower_doubles(nir_shader *shader, nir_lower_doubles_options options); void nir_lower_doubles(nir_shader *shader, nir_lower_doubles_options options);

View File

@@ -389,6 +389,55 @@ lower_fract(nir_builder *b, nir_ssa_def *src)
return nir_fsub(b, src, nir_ffloor(b, src)); return nir_fsub(b, src, nir_ffloor(b, src));
} }
static nir_ssa_def *
lower_round_even(nir_builder *b, nir_ssa_def *src)
{
/* If fract(src) == 0.5, then we will have to decide the rounding direction.
* We will do this by computing the mod(abs(src), 2) and testing if it
* is < 1 or not.
*
* We compute mod(abs(src), 2) as:
* abs(src) - 2.0 * floor(abs(src) / 2.0)
*/
nir_ssa_def *two = nir_imm_double(b, 2.0);
nir_ssa_def *abs_src = nir_fabs(b, src);
nir_ssa_def *mod =
nir_fsub(b,
abs_src,
nir_fmul(b,
two,
nir_ffloor(b,
nir_fmul(b,
abs_src,
nir_imm_double(b, 0.5)))));
/*
* If fract(src) != 0.5, then we round as floor(src + 0.5)
*
* If fract(src) == 0.5, then we have to check the modulo:
*
* if it is < 1 we need a trunc operation so we get:
* 0.5 -> 0, -0.5 -> -0
* 2.5 -> 2, -2.5 -> -2
*
* otherwise we need to check if src >= 0, in which case we need to round
* upwards, or not, in which case we need to round downwards so we get:
* 1.5 -> 2, -1.5 -> -2
* 3.5 -> 4, -3.5 -> -4
*/
nir_ssa_def *fract = nir_ffract(b, src);
return nir_bcsel(b,
nir_fne(b, fract, nir_imm_double(b, 0.5)),
nir_ffloor(b, nir_fadd(b, src, nir_imm_double(b, 0.5))),
nir_bcsel(b,
nir_flt(b, mod, nir_imm_double(b, 1.0)),
nir_ftrunc(b, src),
nir_bcsel(b,
nir_fge(b, src, nir_imm_double(b, 0.0)),
nir_fadd(b, src, nir_imm_double(b, 0.5)),
nir_fsub(b, src, nir_imm_double(b, 0.5)))));
}
static void static void
lower_doubles_instr(nir_alu_instr *instr, nir_lower_doubles_options options) lower_doubles_instr(nir_alu_instr *instr, nir_lower_doubles_options options)
{ {
@@ -432,6 +481,11 @@ lower_doubles_instr(nir_alu_instr *instr, nir_lower_doubles_options options)
return; return;
break; break;
case nir_op_fround_even:
if (!(options & nir_lower_dround_even))
return;
break;
default: default:
return; return;
} }
@@ -467,6 +521,10 @@ lower_doubles_instr(nir_alu_instr *instr, nir_lower_doubles_options options)
case nir_op_ffract: case nir_op_ffract:
result = lower_fract(&bld, src); result = lower_fract(&bld, src);
break; break;
case nir_op_fround_even:
result = lower_round_even(&bld, src);
break;
default: default:
unreachable("unhandled opcode"); unreachable("unhandled opcode");
} }