intel/vec4: Try to emit a single load for multiple 3-src instruction operands
If a 3-source instruction uses immediate values 1.0 and -1.0, just load 1.0 into a register. Use the negation source modifier to get -1.0. This has trivial impact now, but it prevents a few thousand regressions on vec4 platforms with "nir/algebraic: Recognize open-coded flrp(-1, 1, a) and flrp(1, -1, a)" All Gen6 and Gen7 platforms had similar results. (Haswell shown) total instructions in shared programs: 13487412 -> 13487406 (<.01%) instructions in affected programs: 541 -> 535 (-1.11%) helped: 6 HURT: 0 helped stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 helped stats (rel) min: 0.36% max: 2.08% x̄: 1.65% x̃: 1.80% 95% mean confidence interval for instructions value: -1.00 -1.00 95% mean confidence interval for instructions %-change: -2.33% -0.97% Instructions are helped. total cycles in shared programs: 376402564 -> 376402500 (<.01%) cycles in affected programs: 10348 -> 10284 (-0.62%) helped: 10 HURT: 1 helped stats (abs) min: 2 max: 26 x̄: 7.00 x̃: 2 helped stats (rel) min: 0.13% max: 2.05% x̄: 0.89% x̃: 0.79% HURT stats (abs) min: 6 max: 6 x̄: 6.00 x̃: 6 HURT stats (rel) min: 0.29% max: 0.29% x̄: 0.29% x̃: 0.29% 95% mean confidence interval for cycles value: -11.72 0.08 95% mean confidence interval for cycles %-change: -1.20% -0.36% Inconclusive result (value mean confidence interval includes 0). No shader-db changes on any other Intel platform. Reviewed-by: Matt Turner <mattst88@gmail.com>
This commit is contained in:
@@ -243,7 +243,7 @@ public:
|
||||
src_reg emit_uniformize(const src_reg &src);
|
||||
|
||||
/** Fix all float operands of a 3-source instruction. */
|
||||
void fix_float_operands(src_reg op[3]);
|
||||
void fix_float_operands(src_reg op[3], nir_alu_instr *instr);
|
||||
|
||||
src_reg fix_3src_operand(const src_reg &src);
|
||||
src_reg resolve_source_modifiers(const src_reg &src);
|
||||
|
@@ -1131,10 +1131,42 @@ try_immediate_source(const nir_alu_instr *instr, src_reg *op,
|
||||
}
|
||||
|
||||
void
|
||||
vec4_visitor::fix_float_operands(src_reg op[3])
|
||||
vec4_visitor::fix_float_operands(src_reg op[3], nir_alu_instr *instr)
|
||||
{
|
||||
bool fixed[3] = { false, false, false };
|
||||
|
||||
for (unsigned i = 0; i < 2; i++) {
|
||||
if (!nir_src_is_const(instr->src[i].src))
|
||||
continue;
|
||||
|
||||
for (unsigned j = i + 1; j < 3; j++) {
|
||||
if (fixed[j])
|
||||
continue;
|
||||
|
||||
if (!nir_src_is_const(instr->src[j].src))
|
||||
continue;
|
||||
|
||||
if (nir_alu_srcs_equal(instr, instr, i, j)) {
|
||||
if (!fixed[i])
|
||||
op[i] = fix_3src_operand(op[i]);
|
||||
|
||||
op[j] = op[i];
|
||||
|
||||
fixed[i] = true;
|
||||
fixed[j] = true;
|
||||
} else if (nir_alu_srcs_negative_equal(instr, instr, i, j)) {
|
||||
if (!fixed[i])
|
||||
op[i] = fix_3src_operand(op[i]);
|
||||
|
||||
op[j] = op[i];
|
||||
op[j].negate = !op[j].negate;
|
||||
|
||||
fixed[i] = true;
|
||||
fixed[j] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < 3; i++) {
|
||||
if (!fixed[i])
|
||||
op[i] = fix_3src_operand(op[i]);
|
||||
@@ -1927,14 +1959,14 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
|
||||
inst = emit(ADD(dst, src_reg(mul_dst), op[2]));
|
||||
inst->saturate = instr->dest.saturate;
|
||||
} else {
|
||||
fix_float_operands(op);
|
||||
fix_float_operands(op, instr);
|
||||
inst = emit(MAD(dst, op[2], op[1], op[0]));
|
||||
inst->saturate = instr->dest.saturate;
|
||||
}
|
||||
break;
|
||||
|
||||
case nir_op_flrp:
|
||||
fix_float_operands(op);
|
||||
fix_float_operands(op, instr);
|
||||
inst = emit(LRP(dst, op[2], op[1], op[0]));
|
||||
inst->saturate = instr->dest.saturate;
|
||||
break;
|
||||
|
Reference in New Issue
Block a user