v3d: Fix copy-propagation of input unpacks.
I had a single function for "does this do float input unpacking" with two major flaws: It was missing the most common thing to try to copy propagate a f32 input nunpack to (the VFPACK to an FP16 render target) along with several other ALU ops, and also would try to propagate an f32 unpack into a VFMUL which only does f16 unpacks. instructions in affected programs: 659232 -> 655895 (-0.51%) uniforms in affected programs: 132613 -> 135336 (2.05%) and a couple of programs increase their thread counts. The uniforms hit appears to be a pattern in generated code of doing (-a >= a) comparisons, which when a is abs(b) can result in the abs instruction being copy propagated once but not fully DCEed.
This commit is contained in:
@@ -787,7 +787,6 @@ bool vir_is_raw_mov(struct qinst *inst);
|
|||||||
bool vir_is_tex(struct qinst *inst);
|
bool vir_is_tex(struct qinst *inst);
|
||||||
bool vir_is_add(struct qinst *inst);
|
bool vir_is_add(struct qinst *inst);
|
||||||
bool vir_is_mul(struct qinst *inst);
|
bool vir_is_mul(struct qinst *inst);
|
||||||
bool vir_is_float_input(struct qinst *inst);
|
|
||||||
bool vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst);
|
bool vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst);
|
||||||
bool vir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst);
|
bool vir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst);
|
||||||
struct qreg vir_follow_movs(struct v3d_compile *c, struct qreg reg);
|
struct qreg vir_follow_movs(struct v3d_compile *c, struct qreg reg);
|
||||||
|
@@ -132,38 +132,6 @@ vir_has_side_effects(struct v3d_compile *c, struct qinst *inst)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
|
||||||
vir_is_float_input(struct qinst *inst)
|
|
||||||
{
|
|
||||||
/* XXX: More instrs */
|
|
||||||
switch (inst->qpu.type) {
|
|
||||||
case V3D_QPU_INSTR_TYPE_BRANCH:
|
|
||||||
return false;
|
|
||||||
case V3D_QPU_INSTR_TYPE_ALU:
|
|
||||||
switch (inst->qpu.alu.add.op) {
|
|
||||||
case V3D_QPU_A_FADD:
|
|
||||||
case V3D_QPU_A_FSUB:
|
|
||||||
case V3D_QPU_A_FMIN:
|
|
||||||
case V3D_QPU_A_FMAX:
|
|
||||||
case V3D_QPU_A_FTOIN:
|
|
||||||
return true;
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (inst->qpu.alu.mul.op) {
|
|
||||||
case V3D_QPU_M_FMOV:
|
|
||||||
case V3D_QPU_M_VFMUL:
|
|
||||||
case V3D_QPU_M_FMUL:
|
|
||||||
return true;
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool
|
bool
|
||||||
vir_is_raw_mov(struct qinst *inst)
|
vir_is_raw_mov(struct qinst *inst)
|
||||||
{
|
{
|
||||||
|
@@ -151,13 +151,36 @@ try_copy_prop(struct v3d_compile *c, struct qinst *inst, struct qinst **movs)
|
|||||||
* would be the same between the two
|
* would be the same between the two
|
||||||
* instructions.
|
* instructions.
|
||||||
*/
|
*/
|
||||||
if (vir_is_float_input(inst) !=
|
if (v3d_qpu_unpacks_f32(&inst->qpu) !=
|
||||||
vir_is_float_input(mov)) {
|
v3d_qpu_unpacks_f32(&mov->qpu) ||
|
||||||
|
v3d_qpu_unpacks_f16(&inst->qpu) !=
|
||||||
|
v3d_qpu_unpacks_f16(&mov->qpu)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* No composing the unpacks. */
|
/* No composing the unpacks. */
|
||||||
if (vir_has_unpack(inst, i))
|
if (vir_has_unpack(inst, i))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
/* these ops can't represent abs. */
|
||||||
|
if (mov->qpu.alu.mul.a_unpack == V3D_QPU_UNPACK_ABS) {
|
||||||
|
switch (inst->qpu.alu.add.op) {
|
||||||
|
case V3D_QPU_A_VFPACK:
|
||||||
|
case V3D_QPU_A_FROUND:
|
||||||
|
case V3D_QPU_A_FTRUNC:
|
||||||
|
case V3D_QPU_A_FFLOOR:
|
||||||
|
case V3D_QPU_A_FCEIL:
|
||||||
|
case V3D_QPU_A_FDX:
|
||||||
|
case V3D_QPU_A_FDY:
|
||||||
|
case V3D_QPU_A_FTOIN:
|
||||||
|
case V3D_QPU_A_FTOIZ:
|
||||||
|
case V3D_QPU_A_FTOUZ:
|
||||||
|
case V3D_QPU_A_FTOC:
|
||||||
|
continue;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (debug) {
|
if (debug) {
|
||||||
|
@@ -867,3 +867,70 @@ v3d_qpu_writes_flags(const struct v3d_qpu_instr *inst)
|
|||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
v3d_qpu_unpacks_f32(const struct v3d_qpu_instr *inst)
|
||||||
|
{
|
||||||
|
if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
switch (inst->alu.add.op) {
|
||||||
|
case V3D_QPU_A_FADD:
|
||||||
|
case V3D_QPU_A_FADDNF:
|
||||||
|
case V3D_QPU_A_FSUB:
|
||||||
|
case V3D_QPU_A_FMIN:
|
||||||
|
case V3D_QPU_A_FMAX:
|
||||||
|
case V3D_QPU_A_FCMP:
|
||||||
|
case V3D_QPU_A_FROUND:
|
||||||
|
case V3D_QPU_A_FTRUNC:
|
||||||
|
case V3D_QPU_A_FFLOOR:
|
||||||
|
case V3D_QPU_A_FCEIL:
|
||||||
|
case V3D_QPU_A_FDX:
|
||||||
|
case V3D_QPU_A_FDY:
|
||||||
|
case V3D_QPU_A_FTOIN:
|
||||||
|
case V3D_QPU_A_FTOIZ:
|
||||||
|
case V3D_QPU_A_FTOUZ:
|
||||||
|
case V3D_QPU_A_FTOC:
|
||||||
|
case V3D_QPU_A_VFPACK:
|
||||||
|
return true;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (inst->alu.mul.op) {
|
||||||
|
case V3D_QPU_M_FMOV:
|
||||||
|
case V3D_QPU_M_FMUL:
|
||||||
|
return true;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
bool
|
||||||
|
v3d_qpu_unpacks_f16(const struct v3d_qpu_instr *inst)
|
||||||
|
{
|
||||||
|
if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
switch (inst->alu.add.op) {
|
||||||
|
case V3D_QPU_A_VFMIN:
|
||||||
|
case V3D_QPU_A_VFMAX:
|
||||||
|
return true;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (inst->alu.mul.op) {
|
||||||
|
case V3D_QPU_M_VFMUL:
|
||||||
|
return true;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
@@ -464,5 +464,7 @@ bool v3d_qpu_reads_flags(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
|
|||||||
bool v3d_qpu_writes_flags(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
|
bool v3d_qpu_writes_flags(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
|
||||||
bool v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo,
|
bool v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo,
|
||||||
const struct v3d_qpu_sig *sig) ATTRIBUTE_CONST;
|
const struct v3d_qpu_sig *sig) ATTRIBUTE_CONST;
|
||||||
|
bool v3d_qpu_unpacks_f32(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
|
||||||
|
bool v3d_qpu_unpacks_f16(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
Reference in New Issue
Block a user