v3d: Switch to using the new SFU instructions on V3D 4.x.
These instructions let us write directly to the phys regfile, instead of just R4. That lets us avoid moving out of R4 to avoid conflicting with other SFU results, and to avoid conflicting with thread switches. There is still an extra instruction of latency, which is not represented in the scheduler at the moment. If you use the result before it's ready, the QPU will just stall, unlike the magic R4 mode where you'd read the previous value. That means that the following shader-db results aren't quite representative (since we now cause some stalls instead of emitting nops), but they're impressive enough that I'm happy with the change. total instructions in shared programs: 95669 -> 91275 (-4.59%) instructions in affected programs: 82590 -> 78196 (-5.32%)
This commit is contained in:
@@ -935,6 +935,17 @@ vir_uniform(struct v3d_compile *c,
|
||||
return vir_reg(QFILE_UNIF, uniform);
|
||||
}
|
||||
|
||||
static bool
|
||||
vir_can_set_flags(struct v3d_compile *c, struct qinst *inst)
|
||||
{
|
||||
if (c->devinfo->ver >= 40 && (v3d_qpu_reads_vpm(&inst->qpu) ||
|
||||
v3d_qpu_uses_sfu(&inst->qpu))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
vir_PF(struct v3d_compile *c, struct qreg src, enum v3d_qpu_pf pf)
|
||||
{
|
||||
@@ -954,7 +965,8 @@ vir_PF(struct v3d_compile *c, struct qreg src, enum v3d_qpu_pf pf)
|
||||
|
||||
if (src.file != QFILE_TEMP ||
|
||||
!c->defs[src.index] ||
|
||||
last_inst != c->defs[src.index]) {
|
||||
last_inst != c->defs[src.index] ||
|
||||
!vir_can_set_flags(c, last_inst)) {
|
||||
/* XXX: Make the MOV be the appropriate type */
|
||||
last_inst = vir_MOV_dest(c, vir_reg(QFILE_NULL, 0), src);
|
||||
}
|
||||
|
Reference in New Issue
Block a user