broadcom/compiler: Skip bool_to_cond where possible
This change keeps track of when a boolean temp is loaded into the flags by a comparison instruction and uses that information to skip emitting instructions to set the flags in ntq_emit_bool_to_cond when the flags already have the right contents. total instructions in shared programs: 11116502 -> 11112225 (-0.04%) instructions in affected programs: 631691 -> 627414 (-0.68%) helped: 1591 HURT: 754 helped stats (abs) min: 1 max: 94 x̄: 4.14 x̃: 3 helped stats (rel) min: 0.11% max: 13.46% x̄: 2.10% x̃: 1.58% HURT stats (abs) min: 1 max: 19 x̄: 3.07 x̃: 2 HURT stats (rel) min: 0.13% max: 19.67% x̄: 1.88% x̃: 1.15% 95% mean confidence interval for instructions value: -2.02 -1.63 95% mean confidence interval for instructions %-change: -0.94% -0.71% Instructions are helped. total uniforms in shared programs: 3281555 -> 3281513 (<.01%) uniforms in affected programs: 1754 -> 1712 (-2.39%) helped: 10 HURT: 5 helped stats (abs) min: 1 max: 19 x̄: 7.90 x̃: 5 helped stats (rel) min: 0.56% max: 11.11% x̄: 7.37% x̃: 11.05% HURT stats (abs) min: 1 max: 15 x̄: 7.40 x̃: 3 HURT stats (rel) min: 0.64% max: 9.55% x̄: 5.31% x̃: 3.41% 95% mean confidence interval for uniforms value: -8.57 2.97 95% mean confidence interval for uniforms %-change: -7.35% 1.07% Inconclusive result (value mean confidence interval includes 0). total max-temps in shared programs: 1758419 -> 1758174 (-0.01%) max-temps in affected programs: 7006 -> 6761 (-3.50%) helped: 290 HURT: 14 helped stats (abs) min: 1 max: 8 x̄: 1.13 x̃: 1 helped stats (rel) min: 0.79% max: 22.86% x̄: 6.61% x̃: 4.88% HURT stats (abs) min: 1 max: 13 x̄: 6.00 x̃: 3 HURT stats (rel) min: 1.54% max: 54.17% x̄: 23.99% x̃: 9.12% 95% mean confidence interval for max-temps value: -1.03 -0.58 95% mean confidence interval for max-temps %-change: -6.24% -4.16% Max-temps are helped. total sfu-stalls in shared programs: 23676 -> 23610 (-0.28%) sfu-stalls in affected programs: 1578 -> 1512 (-4.18%) helped: 257 HURT: 252 helped stats (abs) min: 1 max: 3 x̄: 1.37 x̃: 1 helped stats (rel) min: 11.11% max: 100.00% x̄: 46.70% x̃: 40.00% HURT stats (abs) min: 1 max: 2 x̄: 1.14 x̃: 1 HURT stats (rel) min: 0.00% max: 200.00% x̄: 41.65% x̃: 25.00% 95% mean confidence interval for sfu-stalls value: -0.25 -0.01 95% mean confidence interval for sfu-stalls %-change: -8.24% 2.33% Inconclusive result (%-change mean confidence interval includes 0). total inst-and-stalls in shared programs: 11140178 -> 11135835 (-0.04%) inst-and-stalls in affected programs: 633972 -> 629629 (-0.69%) helped: 1581 HURT: 755 helped stats (abs) min: 1 max: 94 x̄: 4.26 x̃: 3 helped stats (rel) min: 0.11% max: 13.46% x̄: 2.12% x̃: 1.59% HURT stats (abs) min: 1 max: 17 x̄: 3.17 x̃: 2 HURT stats (rel) min: 0.05% max: 19.67% x̄: 1.93% x̃: 1.20% 95% mean confidence interval for inst-and-stalls value: -2.06 -1.66 95% mean confidence interval for inst-and-stalls %-change: -0.93% -0.70% Inst-and-stalls are helped. Reviewed-by: Iago Toral Quioroga <itoral@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8933>
This commit is contained in:

committed by
Marge Bot

parent
8762f29e9c
commit
9909fe6bac
@@ -1137,6 +1137,11 @@ ntq_get_alu_parent(nir_src src)
|
||||
static enum v3d_qpu_cond
|
||||
ntq_emit_bool_to_cond(struct v3d_compile *c, nir_src src)
|
||||
{
|
||||
struct qreg qsrc = ntq_get_src(c, src, 0);
|
||||
/* skip if we already have src in the flags */
|
||||
if (qsrc.file == QFILE_TEMP && c->flags_temp == qsrc.index)
|
||||
return c->flags_cond;
|
||||
|
||||
nir_alu_instr *compare = ntq_get_alu_parent(src);
|
||||
if (!compare)
|
||||
goto out;
|
||||
@@ -1146,6 +1151,7 @@ ntq_emit_bool_to_cond(struct v3d_compile *c, nir_src src)
|
||||
return cond;
|
||||
|
||||
out:
|
||||
|
||||
vir_set_pf(c, vir_MOV_dest(c, vir_nop_reg(), ntq_get_src(c, src, 0)),
|
||||
V3D_QPU_PF_PUSHZ);
|
||||
return V3D_QPU_COND_IFNA;
|
||||
@@ -1294,6 +1300,8 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr)
|
||||
result = vir_MOV(c, vir_SEL(c, cond,
|
||||
vir_uniform_f(c, 1.0),
|
||||
vir_uniform_f(c, 0.0)));
|
||||
c->flags_temp = result.index;
|
||||
c->flags_cond = cond;
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -1315,6 +1323,8 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr)
|
||||
result = vir_MOV(c, vir_SEL(c, cond,
|
||||
vir_uniform_ui(c, ~0),
|
||||
vir_uniform_ui(c, 0)));
|
||||
c->flags_temp = result.index;
|
||||
c->flags_cond = cond;
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -1397,6 +1407,8 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr)
|
||||
result = vir_MOV(c, vir_SEL(c, V3D_QPU_COND_IFA,
|
||||
vir_uniform_ui(c, ~0),
|
||||
vir_uniform_ui(c, 0)));
|
||||
c->flags_temp = result.index;
|
||||
c->flags_cond = V3D_QPU_COND_IFA;
|
||||
break;
|
||||
|
||||
case nir_op_pack_half_2x16_split:
|
||||
@@ -2672,10 +2684,12 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
|
||||
|
||||
case nir_intrinsic_load_helper_invocation:
|
||||
vir_set_pf(c, vir_MSF_dest(c, vir_nop_reg()), V3D_QPU_PF_PUSHZ);
|
||||
ntq_store_dest(c, &instr->dest, 0,
|
||||
vir_MOV(c, vir_SEL(c, V3D_QPU_COND_IFA,
|
||||
struct qreg qdest = vir_MOV(c, vir_SEL(c, V3D_QPU_COND_IFA,
|
||||
vir_uniform_ui(c, ~0),
|
||||
vir_uniform_ui(c, 0))));
|
||||
vir_uniform_ui(c, 0)));
|
||||
c->flags_temp = qdest.index;
|
||||
c->flags_cond = V3D_QPU_COND_IFA;
|
||||
ntq_store_dest(c, &instr->dest, 0, qdest);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_front_face:
|
||||
|
@@ -735,6 +735,13 @@ struct v3d_compile {
|
||||
struct qblock *cur_block;
|
||||
struct qblock *loop_cont_block;
|
||||
struct qblock *loop_break_block;
|
||||
/**
|
||||
* Which temp, if any, do we currently have in the flags?
|
||||
* This is set when processing a comparison instruction, and
|
||||
* reset to -1 by anything else that touches the flags.
|
||||
*/
|
||||
int32_t flags_temp;
|
||||
enum v3d_qpu_cond flags_cond;
|
||||
|
||||
uint64_t *qpu_insts;
|
||||
uint32_t qpu_inst_count;
|
||||
|
@@ -234,6 +234,7 @@ vir_set_cond(struct qinst *inst, enum v3d_qpu_cond cond)
|
||||
void
|
||||
vir_set_pf(struct v3d_compile *c, struct qinst *inst, enum v3d_qpu_pf pf)
|
||||
{
|
||||
c->flags_temp = -1;
|
||||
if (vir_is_add(inst)) {
|
||||
inst->qpu.flags.apf = pf;
|
||||
} else {
|
||||
@@ -245,6 +246,7 @@ vir_set_pf(struct v3d_compile *c, struct qinst *inst, enum v3d_qpu_pf pf)
|
||||
void
|
||||
vir_set_uf(struct v3d_compile *c, struct qinst *inst, enum v3d_qpu_uf uf)
|
||||
{
|
||||
c->flags_temp = -1;
|
||||
if (vir_is_add(inst)) {
|
||||
inst->qpu.flags.auf = uf;
|
||||
} else {
|
||||
@@ -542,6 +544,7 @@ vir_compile_init(const struct v3d_compiler *compiler,
|
||||
_mesa_key_pointer_equal);
|
||||
|
||||
c->tmu.outstanding_regs = _mesa_pointer_set_create(c);
|
||||
c->flags_temp = -1;
|
||||
|
||||
return c;
|
||||
}
|
||||
|
Reference in New Issue
Block a user