nir/lower_double_ops: lower ceil()

At least i965 hardware does not have native support for ceil on doubles.

v2 (Sam):
   - Improve the lowering pass to remove one bcsel (Jason).

Signed-off-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
This commit is contained in:
Iago Toral Quiroga
2016-01-04 16:10:11 +01:00
committed by Samuel Iglesias Gonsálvez
parent 29541ec531
commit 126a1ac03f
2 changed files with 24 additions and 0 deletions

View File

@@ -2419,6 +2419,7 @@ typedef enum {
nir_lower_drsq = (1 << 2), nir_lower_drsq = (1 << 2),
nir_lower_dtrunc = (1 << 3), nir_lower_dtrunc = (1 << 3),
nir_lower_dfloor = (1 << 4), nir_lower_dfloor = (1 << 4),
nir_lower_dceil = (1 << 5),
} nir_lower_doubles_options; } nir_lower_doubles_options;
void nir_lower_doubles(nir_shader *shader, nir_lower_doubles_options options); void nir_lower_doubles(nir_shader *shader, nir_lower_doubles_options options);

View File

@@ -368,6 +368,21 @@ lower_floor(nir_builder *b, nir_ssa_def *src)
nir_fsub(b, tr, nir_imm_double(b, 1.0))); nir_fsub(b, tr, nir_imm_double(b, 1.0)));
} }
static nir_ssa_def *
lower_ceil(nir_builder *b, nir_ssa_def *src)
{
/* if x < 0, ceil(x) = trunc(x)
* else if (x - trunc(x) == 0), ceil(x) = x
* else, ceil(x) = trunc(x) + 1
*/
nir_ssa_def *tr = nir_ftrunc(b, src);
nir_ssa_def *negative = nir_flt(b, src, nir_imm_double(b, 0.0));
return nir_bcsel(b,
nir_ior(b, negative, nir_feq(b, src, tr)),
tr,
nir_fadd(b, tr, nir_imm_double(b, 1.0)));
}
static void static void
lower_doubles_instr(nir_alu_instr *instr, nir_lower_doubles_options options) lower_doubles_instr(nir_alu_instr *instr, nir_lower_doubles_options options)
{ {
@@ -401,6 +416,11 @@ lower_doubles_instr(nir_alu_instr *instr, nir_lower_doubles_options options)
return; return;
break; break;
case nir_op_fceil:
if (!(options & nir_lower_dceil))
return;
break;
default: default:
return; return;
} }
@@ -430,6 +450,9 @@ lower_doubles_instr(nir_alu_instr *instr, nir_lower_doubles_options options)
case nir_op_ffloor: case nir_op_ffloor:
result = lower_floor(&bld, src); result = lower_floor(&bld, src);
break; break;
case nir_op_fceil:
result = lower_ceil(&bld, src);
break;
default: default:
unreachable("unhandled opcode"); unreachable("unhandled opcode");
} }