v3d: Fold comparisons for IF conditions into the flags for the IF.

total instructions in shared programs: 6193810 -> 6192844 (-0.02%)
instructions in affected programs: 800373 -> 799407 (-0.12%)
This commit is contained in:
Eric Anholt
2018-12-28 16:31:07 -08:00
parent 078dc176bc
commit 5e9ee6e841
5 changed files with 57 additions and 12 deletions

View File

@@ -1736,19 +1736,33 @@ ntq_emit_if(struct v3d_compile *c, nir_if *if_stmt)
was_top_level = true; was_top_level = true;
} }
/* Set A for executing (execute == 0) and jumping (if->condition == /* Set up the flags for the IF condition (taking the THEN branch). */
* 0) channels, and then update execute flags for those to point to nir_alu_instr *if_condition_alu = ntq_get_alu_parent(if_stmt->condition);
* the ELSE block. enum v3d_qpu_cond cond;
* if (!if_condition_alu ||
* XXX perf: we could reuse ntq_emit_comparison() to generate our if !ntq_emit_comparison(c, if_condition_alu, &cond)) {
* condition, and the .uf field to ignore non-executing channels, to vir_PF(c, ntq_get_src(c, if_stmt->condition, 0),
* reduce the overhead of if statements. V3D_QPU_PF_PUSHZ);
cond = V3D_QPU_COND_IFNA;
}
/* Update the flags+cond to mean "Taking the ELSE branch (!cond) and
* was previously active (execute Z) for updating the exec flags.
*/ */
vir_PF(c, vir_OR(c, if (was_top_level) {
c->execute, cond = v3d_qpu_cond_invert(cond);
ntq_get_src(c, if_stmt->condition, 0)), } else {
V3D_QPU_PF_PUSHZ); struct qinst *inst = vir_MOV_dest(c, vir_reg(QFILE_NULL, 0),
vir_MOV_cond(c, V3D_QPU_COND_IFA, c->execute);
if (cond == V3D_QPU_COND_IFA) {
vir_set_uf(inst, V3D_QPU_UF_NORNZ);
} else {
vir_set_uf(inst, V3D_QPU_UF_ANDZ);
cond = V3D_QPU_COND_IFA;
}
}
vir_MOV_cond(c, cond,
c->execute, c->execute,
vir_uniform_ui(c, else_block->index)); vir_uniform_ui(c, else_block->index));

View File

@@ -751,6 +751,7 @@ struct qreg vir_emit_def(struct v3d_compile *c, struct qinst *inst);
struct qinst *vir_emit_nondef(struct v3d_compile *c, struct qinst *inst); struct qinst *vir_emit_nondef(struct v3d_compile *c, struct qinst *inst);
void vir_set_cond(struct qinst *inst, enum v3d_qpu_cond cond); void vir_set_cond(struct qinst *inst, enum v3d_qpu_cond cond);
void vir_set_pf(struct qinst *inst, enum v3d_qpu_pf pf); void vir_set_pf(struct qinst *inst, enum v3d_qpu_pf pf);
void vir_set_uf(struct qinst *inst, enum v3d_qpu_uf uf);
void vir_set_unpack(struct qinst *inst, int src, void vir_set_unpack(struct qinst *inst, int src,
enum v3d_qpu_input_unpack unpack); enum v3d_qpu_input_unpack unpack);

View File

@@ -291,6 +291,17 @@ vir_set_pf(struct qinst *inst, enum v3d_qpu_pf pf)
} }
} }
void
vir_set_uf(struct qinst *inst, enum v3d_qpu_uf uf)
{
if (vir_is_add(inst)) {
inst->qpu.flags.auf = uf;
} else {
assert(vir_is_mul(inst));
inst->qpu.flags.muf = uf;
}
}
#if 0 #if 0
uint8_t uint8_t
vir_channels_written(struct qinst *inst) vir_channels_written(struct qinst *inst)

View File

@@ -499,6 +499,23 @@ v3d_qpu_mul_op_num_src(enum v3d_qpu_mul_op op)
return 0; return 0;
} }
enum v3d_qpu_cond
v3d_qpu_cond_invert(enum v3d_qpu_cond cond)
{
switch (cond) {
case V3D_QPU_COND_IFA:
return V3D_QPU_COND_IFNA;
case V3D_QPU_COND_IFNA:
return V3D_QPU_COND_IFA;
case V3D_QPU_COND_IFB:
return V3D_QPU_COND_IFNB;
case V3D_QPU_COND_IFNB:
return V3D_QPU_COND_IFB;
default:
unreachable("Non-invertible cond");
}
}
bool bool
v3d_qpu_magic_waddr_is_sfu(enum v3d_qpu_waddr waddr) v3d_qpu_magic_waddr_is_sfu(enum v3d_qpu_waddr waddr)
{ {

View File

@@ -398,6 +398,8 @@ const char *v3d_qpu_unpack_name(enum v3d_qpu_input_unpack unpack);
const char *v3d_qpu_branch_cond_name(enum v3d_qpu_branch_cond cond); const char *v3d_qpu_branch_cond_name(enum v3d_qpu_branch_cond cond);
const char *v3d_qpu_msfign_name(enum v3d_qpu_msfign msfign); const char *v3d_qpu_msfign_name(enum v3d_qpu_msfign msfign);
enum v3d_qpu_cond v3d_qpu_cond_invert(enum v3d_qpu_cond cond) ATTRIBUTE_CONST;
bool v3d_qpu_add_op_has_dst(enum v3d_qpu_add_op op); bool v3d_qpu_add_op_has_dst(enum v3d_qpu_add_op op);
bool v3d_qpu_mul_op_has_dst(enum v3d_qpu_mul_op op); bool v3d_qpu_mul_op_has_dst(enum v3d_qpu_mul_op op);
int v3d_qpu_add_op_num_src(enum v3d_qpu_add_op op); int v3d_qpu_add_op_num_src(enum v3d_qpu_add_op op);